Commit d60cf39f authored by Ben Shi's avatar Ben Shi Committed by Cherry Zhang

cmd/compile: optimize arm64's MADD and MSUB

This CL implements constant folding for MADD/MSUB on arm64.

1. The total size of pkg/android_arm64/ decreases about 4KB,
   excluding cmd/compile/ .

2. There is no regression in the go1 benchmark, excluding noise.
name                     old time/op    new time/op    delta
BinaryTree17-4              16.4s ± 1%     16.5s ± 1%  +0.24%  (p=0.008 n=29+29)
Fannkuch11-4                8.73s ± 0%     8.71s ± 0%  -0.15%  (p=0.000 n=29+29)
FmtFprintfEmpty-4           174ns ± 0%     174ns ± 0%    ~     (all equal)
FmtFprintfString-4          370ns ± 0%     372ns ± 2%  +0.53%  (p=0.007 n=24+30)
FmtFprintfInt-4             419ns ± 0%     419ns ± 0%    ~     (all equal)
FmtFprintfIntInt-4          673ns ± 1%     661ns ± 1%  -1.81%  (p=0.000 n=30+27)
FmtFprintfPrefixedInt-4     806ns ± 0%     805ns ± 0%    ~     (p=0.957 n=28+27)
FmtFprintfFloat-4          1.09µs ± 0%    1.09µs ± 0%  -0.04%  (p=0.001 n=22+30)
FmtManyArgs-4              2.67µs ± 0%    2.68µs ± 0%  +0.03%  (p=0.045 n=29+28)
GobDecode-4                33.2ms ± 1%    32.5ms ± 1%  -2.11%  (p=0.000 n=29+29)
GobEncode-4                29.5ms ± 0%    29.2ms ± 0%  -1.04%  (p=0.000 n=28+28)
Gzip-4                      1.39s ± 2%     1.38s ± 1%  -0.48%  (p=0.023 n=30+30)
Gunzip-4                    139ms ± 0%     139ms ± 0%    ~     (p=0.616 n=30+28)
HTTPClientServer-4          766µs ± 4%     758µs ± 3%  -1.03%  (p=0.013 n=28+29)
JSONEncode-4               49.7ms ± 0%    49.6ms ± 0%  -0.24%  (p=0.000 n=30+30)
JSONDecode-4                266ms ± 0%     268ms ± 1%  +1.07%  (p=0.000 n=29+30)
Mandelbrot200-4            16.6ms ± 0%    16.6ms ± 0%    ~     (p=0.248 n=30+29)
GoParse-4                  15.9ms ± 0%    16.0ms ± 0%  +0.76%  (p=0.000 n=29+29)
RegexpMatchEasy0_32-4       381ns ± 0%     380ns ± 0%  -0.14%  (p=0.000 n=30+30)
RegexpMatchEasy0_1K-4      1.18µs ± 0%    1.19µs ± 1%  +0.30%  (p=0.000 n=29+30)
RegexpMatchEasy1_32-4       357ns ± 0%     357ns ± 0%    ~     (all equal)
RegexpMatchEasy1_1K-4      2.04µs ± 0%    2.05µs ± 0%  +0.50%  (p=0.000 n=26+28)
RegexpMatchMedium_32-4      590ns ± 0%     589ns ± 0%  -0.12%  (p=0.000 n=30+23)
RegexpMatchMedium_1K-4      162µs ± 0%     162µs ± 0%    ~     (p=0.318 n=28+25)
RegexpMatchHard_32-4       9.56µs ± 0%    9.56µs ± 0%    ~     (p=0.072 n=30+29)
RegexpMatchHard_1K-4        287µs ± 0%     287µs ± 0%  -0.02%  (p=0.005 n=28+28)
Revcomp-4                   2.50s ± 0%     2.51s ± 0%    ~     (p=0.246 n=29+29)
Template-4                  312ms ± 1%     313ms ± 1%  +0.46%  (p=0.002 n=30+30)
TimeParse-4                1.68µs ± 0%    1.67µs ± 0%  -0.31%  (p=0.000 n=27+29)
TimeFormat-4               1.66µs ± 0%    1.64µs ± 0%  -0.92%  (p=0.000 n=29+26)
[Geo mean]                  247µs          246µs       -0.15%

name                     old speed      new speed      delta
GobDecode-4              23.1MB/s ± 1%  23.6MB/s ± 0%  +2.17%  (p=0.000 n=29+28)
GobEncode-4              26.0MB/s ± 0%  26.3MB/s ± 0%  +1.05%  (p=0.000 n=28+28)
Gzip-4                   14.0MB/s ± 2%  14.1MB/s ± 1%  +0.47%  (p=0.026 n=30+30)
Gunzip-4                  139MB/s ± 0%   139MB/s ± 0%    ~     (p=0.624 n=30+28)
JSONEncode-4             39.1MB/s ± 0%  39.2MB/s ± 0%  +0.24%  (p=0.000 n=30+30)
JSONDecode-4             7.31MB/s ± 0%  7.23MB/s ± 1%  -1.07%  (p=0.000 n=28+30)
GoParse-4                3.65MB/s ± 0%  3.62MB/s ± 0%  -0.77%  (p=0.000 n=29+29)
RegexpMatchEasy0_32-4    84.0MB/s ± 0%  84.1MB/s ± 0%  +0.18%  (p=0.000 n=28+30)
RegexpMatchEasy0_1K-4     864MB/s ± 0%   861MB/s ± 1%  -0.29%  (p=0.000 n=29+30)
RegexpMatchEasy1_32-4    89.5MB/s ± 0%  89.5MB/s ± 0%    ~     (p=0.841 n=28+28)
RegexpMatchEasy1_1K-4     502MB/s ± 0%   500MB/s ± 0%  -0.51%  (p=0.000 n=29+29)
RegexpMatchMedium_32-4   1.69MB/s ± 0%  1.70MB/s ± 0%  +0.41%  (p=0.000 n=26+30)
RegexpMatchMedium_1K-4   6.31MB/s ± 0%  6.30MB/s ± 0%    ~     (p=0.129 n=30+25)
RegexpMatchHard_32-4     3.35MB/s ± 0%  3.35MB/s ± 0%    ~     (p=0.657 n=30+29)
RegexpMatchHard_1K-4     3.57MB/s ± 0%  3.57MB/s ± 0%    ~     (all equal)
Revcomp-4                 102MB/s ± 0%   101MB/s ± 0%    ~     (p=0.213 n=29+29)
Template-4               6.22MB/s ± 1%  6.19MB/s ± 1%  -0.42%  (p=0.005 n=30+29)
[Geo mean]               24.1MB/s       24.2MB/s       +0.08%

Change-Id: I6c02d3c9975f6bd8bc215cb1fc14d29602b45649
Reviewed-on: https://go-review.googlesource.com/138095
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent bf8e6b70
......@@ -1154,15 +1154,15 @@
(MULW (NEG x) y) -> (MNEGW x y)
// madd/msub
(ADD a l:(MUL x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADD a x y)
(SUB a l:(MUL x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUB a x y)
(ADD a l:(MNEG x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUB a x y)
(SUB a l:(MNEG x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADD a x y)
(ADD a l:(MUL x y)) && l.Uses==1 && clobber(l) -> (MADD a x y)
(SUB a l:(MUL x y)) && l.Uses==1 && clobber(l) -> (MSUB a x y)
(ADD a l:(MNEG x y)) && l.Uses==1 && clobber(l) -> (MSUB a x y)
(SUB a l:(MNEG x y)) && l.Uses==1 && clobber(l) -> (MADD a x y)
(ADD a l:(MULW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADDW a x y)
(SUB a l:(MULW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUBW a x y)
(ADD a l:(MNEGW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUBW a x y)
(SUB a l:(MNEGW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADDW a x y)
(ADD a l:(MULW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MADDW a x y)
(SUB a l:(MULW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MSUBW a x y)
(ADD a l:(MNEGW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MSUBW a x y)
(SUB a l:(MNEGW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MADDW a x y)
// mul by constant
(MUL x (MOVDconst [-1])) -> (NEG x)
......@@ -1210,6 +1210,94 @@
(MNEGW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
(MNEGW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
(MADD a x (MOVDconst [-1])) -> (SUB a x)
(MADD a _ (MOVDconst [0])) -> a
(MADD a x (MOVDconst [1])) -> (ADD a x)
(MADD a x (MOVDconst [c])) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)])
(MADD a x (MOVDconst [c])) && isPowerOfTwo(c-1) && c>=3 -> (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
(MADD a x (MOVDconst [c])) && isPowerOfTwo(c+1) && c>=7 -> (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
(MADD a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
(MADD a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
(MADD a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
(MADD a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
(MADD a (MOVDconst [-1]) x) -> (SUB a x)
(MADD a (MOVDconst [0]) _) -> a
(MADD a (MOVDconst [1]) x) -> (ADD a x)
(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)])
(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c>=3 -> (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c>=7 -> (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
(MADD a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
(MADD a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
(MADD a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
(MADD a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
(MADDW a x (MOVDconst [c])) && int32(c)==-1 -> (SUB a x)
(MADDW a _ (MOVDconst [c])) && int32(c)==0 -> a
(MADDW a x (MOVDconst [c])) && int32(c)==1 -> (ADD a x)
(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)])
(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c)>=3 -> (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c)>=7 -> (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
(MADDW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
(MADDW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
(MADDW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
(MADDW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
(MADDW a (MOVDconst [c]) x) && int32(c)==-1 -> (SUB a x)
(MADDW a (MOVDconst [c]) _) && int32(c)==0 -> a
(MADDW a (MOVDconst [c]) x) && int32(c)==1 -> (ADD a x)
(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)])
(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c)>=3 -> (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c)>=7 -> (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
(MADDW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
(MADDW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
(MADDW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
(MADDW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
(MSUB a x (MOVDconst [-1])) -> (ADD a x)
(MSUB a _ (MOVDconst [0])) -> a
(MSUB a x (MOVDconst [1])) -> (SUB a x)
(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)])
(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c-1) && c>=3 -> (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c+1) && c>=7 -> (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
(MSUB a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
(MSUB a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
(MSUB a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
(MSUB a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
(MSUB a (MOVDconst [-1]) x) -> (ADD a x)
(MSUB a (MOVDconst [0]) _) -> a
(MSUB a (MOVDconst [1]) x) -> (SUB a x)
(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)])
(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c>=3 -> (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c>=7 -> (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
(MSUB a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
(MSUB a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
(MSUB a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
(MSUB a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
(MSUBW a x (MOVDconst [c])) && int32(c)==-1 -> (ADD a x)
(MSUBW a _ (MOVDconst [c])) && int32(c)==0 -> a
(MSUBW a x (MOVDconst [c])) && int32(c)==1 -> (SUB a x)
(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)])
(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c)>=3 -> (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c)>=7 -> (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
(MSUBW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
(MSUBW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
(MSUBW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
(MSUBW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
(MSUBW a (MOVDconst [c]) x) && int32(c)==-1 -> (ADD a x)
(MSUBW a (MOVDconst [c]) _) && int32(c)==0 -> a
(MSUBW a (MOVDconst [c]) x) && int32(c)==1 -> (SUB a x)
(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)])
(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c)>=3 -> (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c)>=7 -> (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
(MSUBW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
(MSUBW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
(MSUBW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
(MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
// div by constant
(UDIV x (MOVDconst [1])) -> x
(UDIV x (MOVDconst [c])) && isPowerOfTwo(c) -> (SRLconst [log2(c)] x)
......@@ -1261,6 +1349,14 @@
(MULW (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(int32(c)*int32(d))])
(MNEG (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [-c*d])
(MNEGW (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [-int64(int32(c)*int32(d))])
(MADD (MOVDconst [c]) x y) -> (ADDconst [c] (MUL <x.Type> x y))
(MADDW (MOVDconst [c]) x y) -> (ADDconst [c] (MULW <x.Type> x y))
(MSUB (MOVDconst [c]) x y) -> (ADDconst [c] (MNEG <x.Type> x y))
(MSUBW (MOVDconst [c]) x y) -> (ADDconst [c] (MNEGW <x.Type> x y))
(MADD a (MOVDconst [c]) (MOVDconst [d])) -> (ADDconst [c*d] a)
(MADDW a (MOVDconst [c]) (MOVDconst [d])) -> (ADDconst [int64(int32(c)*int32(d))] a)
(MSUB a (MOVDconst [c]) (MOVDconst [d])) -> (SUBconst [c*d] a)
(MSUBW a (MOVDconst [c]) (MOVDconst [d])) -> (SUBconst [int64(int32(c)*int32(d))] a)
(DIV (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c/d])
(UDIV (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(uint64(c)/uint64(d))])
(DIVW (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(int32(c)/int32(d))])
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -206,8 +206,15 @@ func AddMul(x int) int {
return 2*x + 1
}
func MULA(a, b, c uint32) uint32 {
// arm:`MULA`
// arm64:`MADDW`
return a*b + c
func MULA(a, b, c uint32) (uint32, uint32, uint32) {
// arm:`MULA`,-`MUL\s`
// arm64:`MADDW`,-`MULW`
r0 := a*b + c
// arm:`MULA`-`MUL\s`
// arm64:`MADDW`,-`MULW`
r1 := c*79 + a
// arm:`ADD`,-`MULA`-`MUL\s`
// arm64:`ADD`,-`MADD`,-`MULW`
r2 := b*64 + c
return r0, r1, r2
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment