Commit f524268c authored by Balaram Makam's avatar Balaram Makam Committed by Cherry Zhang

cmd/compile: optimize ARM64 code with CMN/TST

Use CMN/TST to simplify comparisons. This can reduce the
register pressure by removing single def/use registers for example:
ADDW R0, R1, R8 -> CMNW R1, R0 ; CMN is an alias of ADDS.
CBZW R8, label  -> BEQ  label  ; single def/use of R8 removed.

Little change in performance of go1 benchmark on Amberwing:
name                   old time/op    new time/op    delta
RegexpMatchEasy0_32       247ns ± 0%     246ns ± 0%  -0.40%  (p=0.008 n=5+5)
RegexpMatchEasy0_1K       581ns ± 0%     580ns ± 0%    ~     (p=0.079 n=4+5)
RegexpMatchEasy1_32       244ns ± 0%     243ns ± 0%  -0.41%  (p=0.008 n=5+5)
RegexpMatchEasy1_1K       804ns ± 0%     806ns ± 0%  +0.25%  (p=0.016 n=5+4)
RegexpMatchMedium_32      313ns ± 0%     311ns ± 0%  -0.64%  (p=0.008 n=5+5)
RegexpMatchMedium_1K     52.2µs ± 0%    51.9µs ± 0%  -0.51%  (p=0.008 n=5+5)
RegexpMatchHard_32       2.76µs ± 3%    2.74µs ± 0%    ~     (p=0.683 n=5+5)
RegexpMatchHard_1K       78.8µs ± 0%    78.9µs ± 0%  +0.04%  (p=0.008 n=5+5)
FmtFprintfEmpty          58.6ns ± 0%    57.7ns ± 0%  -1.54%  (p=0.008 n=5+5)
FmtFprintfString          118ns ± 0%     115ns ± 0%  -2.54%  (p=0.008 n=5+5)
FmtFprintfInt             119ns ± 0%     119ns ± 0%    ~     (all equal)
FmtFprintfIntInt          192ns ± 0%     192ns ± 0%    ~     (all equal)
FmtFprintfPrefixedInt     224ns ± 0%     205ns ± 0%  -8.48%  (p=0.008 n=5+5)
FmtFprintfFloat           336ns ± 0%     333ns ± 1%    ~     (p=0.683 n=5+5)
FmtManyArgs               779ns ± 1%     760ns ± 1%  -2.41%  (p=0.008 n=5+5)
Gzip                      437ms ± 0%     436ms ± 0%  -0.27%  (p=0.008 n=5+5)
HTTPClientServer         90.1µs ± 1%    91.1µs ± 0%  +1.19%  (p=0.008 n=5+5)
JSONEncode               20.1ms ± 0%    20.2ms ± 1%    ~     (p=0.690 n=5+5)
JSONDecode               94.5ms ± 1%    94.1ms ± 1%    ~     (p=0.095 n=5+5)
Mandelbrot200            5.37ms ± 0%    5.37ms ± 0%    ~     (p=0.421 n=5+5)
TimeParse                 450ns ± 0%     446ns ± 0%  -0.89%  (p=0.000 n=5+4)
TimeFormat                483ns ± 1%     473ns ± 0%  -2.19%  (p=0.008 n=5+5)
Template                 90.6ms ± 0%    89.7ms ± 0%  -0.93%  (p=0.008 n=5+5)
GoParse                  5.97ms ± 0%    6.01ms ± 0%  +0.65%  (p=0.008 n=5+5)
BinaryTree17              11.8s ± 0%     11.7s ± 0%  -0.28%  (p=0.016 n=5+5)
Revcomp                   669ms ± 0%     669ms ± 0%    ~     (p=0.222 n=5+5)
Fannkuch11                3.28s ± 0%     3.34s ± 0%  +1.72%  (p=0.016 n=4+5)
[Geo mean]               46.6µs         46.3µs       -0.74%

name                   old speed      new speed      delta
RegexpMatchEasy0_32     129MB/s ± 0%   130MB/s ± 0%  +0.32%  (p=0.016 n=5+4)
RegexpMatchEasy0_1K    1.76GB/s ± 0%  1.76GB/s ± 0%  +0.13%  (p=0.016 n=4+5)
RegexpMatchEasy1_32     131MB/s ± 0%   132MB/s ± 0%  +0.32%  (p=0.008 n=5+5)
RegexpMatchEasy1_1K    1.27GB/s ± 0%  1.27GB/s ± 0%  -0.24%  (p=0.016 n=5+4)
RegexpMatchMedium_32   3.19MB/s ± 0%  3.21MB/s ± 0%  +0.63%  (p=0.008 n=5+5)
RegexpMatchMedium_1K   19.6MB/s ± 0%  19.7MB/s ± 0%  +0.51%  (p=0.029 n=4+4)
RegexpMatchHard_32     11.6MB/s ± 2%  11.7MB/s ± 0%    ~     (p=1.000 n=5+5)
RegexpMatchHard_1K     13.0MB/s ± 0%  13.0MB/s ± 0%    ~     (p=0.079 n=4+5)
Gzip                   44.4MB/s ± 0%  44.5MB/s ± 0%  +0.27%  (p=0.008 n=5+5)
JSONEncode             96.4MB/s ± 0%  96.2MB/s ± 1%    ~     (p=0.579 n=5+5)
JSONDecode             20.5MB/s ± 1%  20.6MB/s ± 1%    ~     (p=0.111 n=5+5)
Template               21.4MB/s ± 0%  21.6MB/s ± 0%  +0.94%  (p=0.008 n=5+5)
GoParse                9.70MB/s ± 0%  9.63MB/s ± 0%  -0.68%  (p=0.016 n=4+5)
Revcomp                 380MB/s ± 0%   380MB/s ± 0%    ~     (p=0.222 n=5+5)
[Geo mean]             55.3MB/s       55.4MB/s       +0.23%

Change-Id: I2e5338138991d9bc984e67b51212aa5d1b0f2a6b
Reviewed-on: https://go-review.googlesource.com/97335Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
parent ebb67d99
...@@ -33,7 +33,12 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8 ...@@ -33,7 +33,12 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8
AND $34903429696192636, R12, R19 // 93910e92 AND $34903429696192636, R12, R19 // 93910e92
ANDW R9@>7, R19, R26 // 7a1ec90a ANDW R9@>7, R19, R26 // 7a1ec90a
AND R9@>7, R19, R26 // 7a1ec98a AND R9@>7, R19, R26 // 7a1ec98a
//TODO TST $2863311530, R24 // 1ff32972 TSTW $2863311530, R24 // 1ff30172
TST R2, R0 // 1f0002ea
TST $7, R2 // 5f0840f2
ANDS R2, R0, ZR // 1f0002ea
ANDS $7, R2, ZR // 5f0840f2
ANDSW $2863311530, R24, ZR // 1ff30172
ANDSW $2863311530, R24, R23 // 17f30172 ANDSW $2863311530, R24, R23 // 17f30172
ANDS $-140737488289793, R2, R5 // 458051f2 ANDS $-140737488289793, R2, R5 // 458051f2
ANDSW R26->24, R21, R15 // af629a6a ANDSW R26->24, R21, R15 // af629a6a
...@@ -374,10 +379,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8 ...@@ -374,10 +379,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8
SXTW R0, R27 // 1b7c4093 SXTW R0, R27 // 1b7c4093
SYSL $285440, R12 // 0c5b2cd5 SYSL $285440, R12 // 0c5b2cd5
//TODO TLBI //TODO TLBI
//TODO TST $0x80000007, R9 // 3f0d0172 TSTW $0x80000007, R9 // TSTW $2147483655, R9 // 3f0d0172
//TODO TST $0xfffffff0, LR // df6f7cf2 TST $0xfffffff0, LR // TST $4294967280, R30 // df6f7cf2
//TODO TSTW R10@>21, R2 // 1f2f11ea TSTW R10@>21, R2 // 5f54ca6a
//TODO TST R17<<11, R24 // 1f2f11ea TST R17<<11, R24 // 1f2f11ea
ANDSW $0x80000007, R9, ZR // ANDSW $2147483655, R9, ZR // 3f0d0172
ANDS $0xfffffff0, LR, ZR // ANDS $4294967280, R30, ZR // df6f7cf2
ANDSW R10@>21, R2, ZR // 5f54ca6a
ANDS R17<<11, R24, ZR // 1f2f11ea
UBFIZW $3, R19, $14, R14 // 6e361d53 UBFIZW $3, R19, $14, R14 // 6e361d53
UBFIZ $3, R22, $14, R4 // c4367dd3 UBFIZ $3, R22, $14, R4 // c4367dd3
UBFXW $3, R7, $20, R15 // ef580353 UBFXW $3, R7, $20, R15 // ef580353
......
...@@ -276,6 +276,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -276,6 +276,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
ssa.OpARM64CMPW, ssa.OpARM64CMPW,
ssa.OpARM64CMN, ssa.OpARM64CMN,
ssa.OpARM64CMNW, ssa.OpARM64CMNW,
ssa.OpARM64TST,
ssa.OpARM64TSTW,
ssa.OpARM64FCMPS, ssa.OpARM64FCMPS,
ssa.OpARM64FCMPD: ssa.OpARM64FCMPD:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
...@@ -285,7 +287,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -285,7 +287,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARM64CMPconst, case ssa.OpARM64CMPconst,
ssa.OpARM64CMPWconst, ssa.OpARM64CMPWconst,
ssa.OpARM64CMNconst, ssa.OpARM64CMNconst,
ssa.OpARM64CMNWconst: ssa.OpARM64CMNWconst,
ssa.OpARM64TSTconst,
ssa.OpARM64TSTWconst:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt p.From.Offset = v.AuxInt
......
...@@ -556,6 +556,30 @@ ...@@ -556,6 +556,30 @@
(NZ (GreaterEqual cc) yes no) -> (GE cc yes no) (NZ (GreaterEqual cc) yes no) -> (GE cc yes no)
(NZ (GreaterEqualU cc) yes no) -> (UGE cc yes no) (NZ (GreaterEqualU cc) yes no) -> (UGE cc yes no)
(EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (EQ (TSTWconst [c] y) yes no)
(NE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (NE (TSTWconst [c] y) yes no)
(LT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (LT (TSTWconst [c] y) yes no)
(LE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (LE (TSTWconst [c] y) yes no)
(GT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (GT (TSTWconst [c] y) yes no)
(GE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (GE (TSTWconst [c] y) yes no)
(EQ (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (EQ (TST x y) yes no)
(NE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (NE (TST x y) yes no)
(EQ (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (EQ (TST x y) yes no)
(NE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (NE (TST x y) yes no)
(EQ (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (EQ (TSTconst [c] y) yes no)
(NE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (NE (TSTconst [c] y) yes no)
(LT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (LT (TSTconst [c] y) yes no)
(LE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (LE (TSTconst [c] y) yes no)
(GT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (GT (TSTconst [c] y) yes no)
(GE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (GE (TSTconst [c] y) yes no)
(EQ (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (EQ (CMN x y) yes no)
(NE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (NE (CMN x y) yes no)
(EQ (CMP x z:(NEG y)) yes no) && z.Uses == 1 -> (EQ (CMN x y) yes no)
(NE (CMP x z:(NEG y)) yes no) && z.Uses == 1 -> (NE (CMN x y) yes no)
(EQ (CMPconst [0] x) yes no) -> (Z x yes no) (EQ (CMPconst [0] x) yes no) -> (Z x yes no)
(NE (CMPconst [0] x) yes no) -> (NZ x yes no) (NE (CMPconst [0] x) yes no) -> (NZ x yes no)
(EQ (CMPWconst [0] x) yes no) -> (ZW x yes no) (EQ (CMPWconst [0] x) yes no) -> (ZW x yes no)
...@@ -566,6 +590,10 @@ ...@@ -566,6 +590,10 @@
(NZ (ANDconst [c] x) yes no) && oneBit(c) -> (TBNZ {ntz(c)} x yes no) (NZ (ANDconst [c] x) yes no) && oneBit(c) -> (TBNZ {ntz(c)} x yes no)
(ZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBZ {ntz(int64(uint32(c)))} x yes no) (ZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBZ {ntz(int64(uint32(c)))} x yes no)
(NZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBNZ {ntz(int64(uint32(c)))} x yes no) (NZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBNZ {ntz(int64(uint32(c)))} x yes no)
(EQ (TSTconst [c] x) yes no) && oneBit(c) -> (TBZ {ntz(c)} x yes no)
(NE (TSTconst [c] x) yes no) && oneBit(c) -> (TBNZ {ntz(c)} x yes no)
(EQ (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBZ {ntz(int64(uint32(c)))} x yes no)
(NE (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBNZ {ntz(int64(uint32(c)))} x yes no)
// Test sign-bit for signed comparisons against zero // Test sign-bit for signed comparisons against zero
(GE (CMPWconst [0] x) yes no) -> (TBZ {int64(31)} x yes no) (GE (CMPWconst [0] x) yes no) -> (TBZ {int64(31)} x yes no)
...@@ -910,6 +938,8 @@ ...@@ -910,6 +938,8 @@
(AND x (MOVDconst [c])) -> (ANDconst [c] x) (AND x (MOVDconst [c])) -> (ANDconst [c] x)
(OR x (MOVDconst [c])) -> (ORconst [c] x) (OR x (MOVDconst [c])) -> (ORconst [c] x)
(XOR x (MOVDconst [c])) -> (XORconst [c] x) (XOR x (MOVDconst [c])) -> (XORconst [c] x)
(TST x (MOVDconst [c])) -> (TSTconst [c] x)
(CMN x (MOVDconst [c])) -> (CMNconst [c] x)
(BIC x (MOVDconst [c])) -> (ANDconst [^c] x) (BIC x (MOVDconst [c])) -> (ANDconst [^c] x)
(EON x (MOVDconst [c])) -> (XORconst [^c] x) (EON x (MOVDconst [c])) -> (XORconst [^c] x)
(ORN x (MOVDconst [c])) -> (ORconst [^c] x) (ORN x (MOVDconst [c])) -> (ORconst [^c] x)
...@@ -1067,6 +1097,23 @@ ...@@ -1067,6 +1097,23 @@
(CMPWconst (MOVDconst [x]) [y]) && int32(x)<int32(y) && uint32(x)>uint32(y) -> (FlagLT_UGT) (CMPWconst (MOVDconst [x]) [y]) && int32(x)<int32(y) && uint32(x)>uint32(y) -> (FlagLT_UGT)
(CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) && uint32(x)<uint32(y) -> (FlagGT_ULT) (CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) && uint32(x)<uint32(y) -> (FlagGT_ULT)
(CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) && uint32(x)>uint32(y) -> (FlagGT_UGT) (CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) && uint32(x)>uint32(y) -> (FlagGT_UGT)
(TSTconst (MOVDconst [x]) [y]) && int64(x&y)==0 -> (FlagEQ)
(TSTconst (MOVDconst [x]) [y]) && int64(x&y)<0 -> (FlagLT_UGT)
(TSTconst (MOVDconst [x]) [y]) && int64(x&y)>0 -> (FlagGT_UGT)
(TSTWconst (MOVDconst [x]) [y]) && int32(x&y)==0 -> (FlagEQ)
(TSTWconst (MOVDconst [x]) [y]) && int32(x&y)<0 -> (FlagLT_UGT)
(TSTWconst (MOVDconst [x]) [y]) && int32(x&y)>0 -> (FlagGT_UGT)
(CMNconst (MOVDconst [x]) [y]) && int64(x)==int64(-y) -> (FlagEQ)
(CMNconst (MOVDconst [x]) [y]) && int64(x)<int64(-y) && uint64(x)<uint64(-y) -> (FlagLT_ULT)
(CMNconst (MOVDconst [x]) [y]) && int64(x)<int64(-y) && uint64(x)>uint64(-y) -> (FlagLT_UGT)
(CMNconst (MOVDconst [x]) [y]) && int64(x)>int64(-y) && uint64(x)<uint64(-y) -> (FlagGT_ULT)
(CMNconst (MOVDconst [x]) [y]) && int64(x)>int64(-y) && uint64(x)>uint64(-y) -> (FlagGT_UGT)
(CMNWconst (MOVDconst [x]) [y]) && int32(x)==int32(-y) -> (FlagEQ)
(CMNWconst (MOVDconst [x]) [y]) && int32(x)<int32(-y) && uint32(x)<uint32(-y) -> (FlagLT_ULT)
(CMNWconst (MOVDconst [x]) [y]) && int32(x)<int32(-y) && uint32(x)>uint32(-y) -> (FlagLT_UGT)
(CMNWconst (MOVDconst [x]) [y]) && int32(x)>int32(-y) && uint32(x)<uint32(-y) -> (FlagGT_ULT)
(CMNWconst (MOVDconst [x]) [y]) && int32(x)>int32(-y) && uint32(x)>uint32(-y) -> (FlagGT_UGT)
// other known comparisons // other known comparisons
(CMPconst (MOVBUreg _) [c]) && 0xff < c -> (FlagLT_ULT) (CMPconst (MOVBUreg _) [c]) && 0xff < c -> (FlagLT_ULT)
......
...@@ -255,6 +255,10 @@ func init() { ...@@ -255,6 +255,10 @@ func init() {
{name: "CMNconst", argLength: 1, reg: gp1flags, asm: "CMN", aux: "Int64", typ: "Flags"}, // arg0 compare to -auxInt {name: "CMNconst", argLength: 1, reg: gp1flags, asm: "CMN", aux: "Int64", typ: "Flags"}, // arg0 compare to -auxInt
{name: "CMNW", argLength: 2, reg: gp2flags, asm: "CMNW", typ: "Flags"}, // arg0 compare to -arg1, 32 bit {name: "CMNW", argLength: 2, reg: gp2flags, asm: "CMNW", typ: "Flags"}, // arg0 compare to -arg1, 32 bit
{name: "CMNWconst", argLength: 1, reg: gp1flags, asm: "CMNW", aux: "Int32", typ: "Flags"}, // arg0 compare to -auxInt, 32 bit {name: "CMNWconst", argLength: 1, reg: gp1flags, asm: "CMNW", aux: "Int32", typ: "Flags"}, // arg0 compare to -auxInt, 32 bit
{name: "TST", argLength: 2, reg: gp2flags, asm: "TST", typ: "Flags"}, // arg0 & arg1 compare to 0
{name: "TSTconst", argLength: 1, reg: gp1flags, asm: "TST", aux: "Int64", typ: "Flags"}, // arg0 & auxInt compare to 0
{name: "TSTW", argLength: 2, reg: gp2flags, asm: "TSTW", typ: "Flags"}, // arg0 & arg1 compare to 0, 32 bit
{name: "TSTWconst", argLength: 1, reg: gp1flags, asm: "TSTW", aux: "Int32", typ: "Flags"}, // arg0 & auxInt compare to 0, 32 bit
{name: "FCMPS", argLength: 2, reg: fp2flags, asm: "FCMPS", typ: "Flags"}, // arg0 compare to arg1, float32 {name: "FCMPS", argLength: 2, reg: fp2flags, asm: "FCMPS", typ: "Flags"}, // arg0 compare to arg1, float32
{name: "FCMPD", argLength: 2, reg: fp2flags, asm: "FCMPD", typ: "Flags"}, // arg0 compare to arg1, float64 {name: "FCMPD", argLength: 2, reg: fp2flags, asm: "FCMPD", typ: "Flags"}, // arg0 compare to arg1, float64
......
...@@ -1109,6 +1109,10 @@ const ( ...@@ -1109,6 +1109,10 @@ const (
OpARM64CMNconst OpARM64CMNconst
OpARM64CMNW OpARM64CMNW
OpARM64CMNWconst OpARM64CMNWconst
OpARM64TST
OpARM64TSTconst
OpARM64TSTW
OpARM64TSTWconst
OpARM64FCMPS OpARM64FCMPS
OpARM64FCMPD OpARM64FCMPD
OpARM64ADDshiftLL OpARM64ADDshiftLL
...@@ -14480,6 +14484,50 @@ var opcodeTable = [...]opInfo{ ...@@ -14480,6 +14484,50 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "TST",
argLen: 2,
asm: arm64.ATST,
reg: regInfo{
inputs: []inputInfo{
{0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
},
},
},
{
name: "TSTconst",
auxType: auxInt64,
argLen: 1,
asm: arm64.ATST,
reg: regInfo{
inputs: []inputInfo{
{0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
},
},
},
{
name: "TSTW",
argLen: 2,
asm: arm64.ATSTW,
reg: regInfo{
inputs: []inputInfo{
{0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
},
},
},
{
name: "TSTWconst",
auxType: auxInt32,
argLen: 1,
asm: arm64.ATSTW,
reg: regInfo{
inputs: []inputInfo{
{0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
},
},
},
{ {
name: "FCMPS", name: "FCMPS",
argLen: 2, argLen: 2,
......
...@@ -213,26 +213,32 @@ var optab = []Optab{ ...@@ -213,26 +213,32 @@ var optab = []Optab{
{AAND, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0}, {AAND, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
{AANDS, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0}, {AANDS, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0},
{AANDS, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0}, {AANDS, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
{ATST, C_REG, C_REG, C_NONE, 1, 4, 0, 0, 0},
{AAND, C_MBCON, C_REG, C_RSP, 53, 4, 0, 0, 0}, {AAND, C_MBCON, C_REG, C_RSP, 53, 4, 0, 0, 0},
{AAND, C_MBCON, C_NONE, C_REG, 53, 4, 0, 0, 0}, {AAND, C_MBCON, C_NONE, C_REG, 53, 4, 0, 0, 0},
{AANDS, C_MBCON, C_REG, C_REG, 53, 4, 0, 0, 0}, {AANDS, C_MBCON, C_REG, C_REG, 53, 4, 0, 0, 0},
{AANDS, C_MBCON, C_NONE, C_REG, 53, 4, 0, 0, 0}, {AANDS, C_MBCON, C_NONE, C_REG, 53, 4, 0, 0, 0},
{ATST, C_MBCON, C_REG, C_NONE, 53, 4, 0, 0, 0},
{AAND, C_BITCON, C_REG, C_RSP, 53, 4, 0, 0, 0}, {AAND, C_BITCON, C_REG, C_RSP, 53, 4, 0, 0, 0},
{AAND, C_BITCON, C_NONE, C_REG, 53, 4, 0, 0, 0}, {AAND, C_BITCON, C_NONE, C_REG, 53, 4, 0, 0, 0},
{AANDS, C_BITCON, C_REG, C_REG, 53, 4, 0, 0, 0}, {AANDS, C_BITCON, C_REG, C_REG, 53, 4, 0, 0, 0},
{AANDS, C_BITCON, C_NONE, C_REG, 53, 4, 0, 0, 0}, {AANDS, C_BITCON, C_NONE, C_REG, 53, 4, 0, 0, 0},
{ATST, C_BITCON, C_REG, C_NONE, 53, 4, 0, 0, 0},
{AAND, C_MOVCON, C_REG, C_RSP, 62, 8, 0, 0, 0}, {AAND, C_MOVCON, C_REG, C_RSP, 62, 8, 0, 0, 0},
{AAND, C_MOVCON, C_NONE, C_REG, 62, 8, 0, 0, 0}, {AAND, C_MOVCON, C_NONE, C_REG, 62, 8, 0, 0, 0},
{AANDS, C_MOVCON, C_REG, C_REG, 62, 8, 0, 0, 0}, {AANDS, C_MOVCON, C_REG, C_REG, 62, 8, 0, 0, 0},
{AANDS, C_MOVCON, C_NONE, C_REG, 62, 8, 0, 0, 0}, {AANDS, C_MOVCON, C_NONE, C_REG, 62, 8, 0, 0, 0},
{ATST, C_MOVCON, C_REG, C_NONE, 62, 8, 0, 0, 0},
{AAND, C_VCON, C_REG, C_RSP, 28, 8, 0, LFROM, 0}, {AAND, C_VCON, C_REG, C_RSP, 28, 8, 0, LFROM, 0},
{AAND, C_VCON, C_NONE, C_REG, 28, 8, 0, LFROM, 0}, {AAND, C_VCON, C_NONE, C_REG, 28, 8, 0, LFROM, 0},
{AANDS, C_VCON, C_REG, C_REG, 28, 8, 0, LFROM, 0}, {AANDS, C_VCON, C_REG, C_REG, 28, 8, 0, LFROM, 0},
{AANDS, C_VCON, C_NONE, C_REG, 28, 8, 0, LFROM, 0}, {AANDS, C_VCON, C_NONE, C_REG, 28, 8, 0, LFROM, 0},
{ATST, C_VCON, C_REG, C_NONE, 28, 8, 0, LFROM, 0},
{AAND, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0}, {AAND, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0},
{AAND, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0}, {AAND, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
{AANDS, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0}, {AANDS, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0},
{AANDS, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0}, {AANDS, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
{ATST, C_SHIFT, C_REG, C_NONE, 3, 4, 0, 0, 0},
{AMOVD, C_RSP, C_NONE, C_RSP, 24, 4, 0, 0, 0}, {AMOVD, C_RSP, C_NONE, C_RSP, 24, 4, 0, 0, 0},
{AMVN, C_REG, C_NONE, C_REG, 24, 4, 0, 0, 0}, {AMVN, C_REG, C_NONE, C_REG, 24, 4, 0, 0, 0},
{AMOVB, C_REG, C_NONE, C_REG, 45, 4, 0, 0, 0}, {AMOVB, C_REG, C_NONE, C_REG, 45, 4, 0, 0, 0},
...@@ -2980,14 +2986,18 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { ...@@ -2980,14 +2986,18 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
if !(o1 != 0) { if !(o1 != 0) {
break break
} }
rt := int(p.To.Reg)
if p.To.Type == obj.TYPE_NONE {
rt = REGZERO
}
r := int(p.Reg) r := int(p.Reg)
if r == 0 { if r == 0 {
r = int(p.To.Reg) r = rt
} }
o2 = c.oprrr(p, p.As) o2 = c.oprrr(p, p.As)
o2 |= REGTMP & 31 << 16 /* shift is 0 */ o2 |= REGTMP & 31 << 16 /* shift is 0 */
o2 |= uint32(r&31) << 5 o2 |= uint32(r&31) << 5
o2 |= uint32(p.To.Reg & 31) o2 |= uint32(rt & 31)
case 29: /* op Rn, Rd */ case 29: /* op Rn, Rd */
fc := c.aclass(&p.From) fc := c.aclass(&p.From)
...@@ -3378,9 +3388,12 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { ...@@ -3378,9 +3388,12 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
o1 |= uint32((p.From.Offset & 0x7F) << 5) o1 |= uint32((p.From.Offset & 0x7F) << 5)
case 53: /* and/or/eor/bic/... $bitcon, Rn, Rd */ case 53: /* and/or/eor/bic/tst/... $bitcon, Rn, Rd */
a := p.As a := p.As
rt := int(p.To.Reg) rt := int(p.To.Reg)
if p.To.Type == obj.TYPE_NONE {
rt = REGZERO
}
r := int(p.Reg) r := int(p.Reg)
if r == 0 { if r == 0 {
r = rt r = rt
...@@ -3388,7 +3401,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { ...@@ -3388,7 +3401,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
mode := 64 mode := 64
v := uint64(p.From.Offset) v := uint64(p.From.Offset)
switch p.As { switch p.As {
case AANDW, AORRW, AEORW, AANDSW: case AANDW, AORRW, AEORW, AANDSW, ATSTW:
mode = 32 mode = 32
case ABIC, AORN, AEON, ABICS: case ABIC, AORN, AEON, ABICS:
v = ^v v = ^v
...@@ -4001,7 +4014,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { ...@@ -4001,7 +4014,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
size = 1 size = 1
} }
o1 |= (Q&1) << 30 | (size&3) << 22 | uint32(rf&31) << 5 | uint32(rt&31) o1 |= (Q&1)<<30 | (size&3)<<22 | uint32(rf&31)<<5 | uint32(rt&31)
case 84: /* vst1 [Vt1.<T>, Vt2.<T>, ...], (Rn) */ case 84: /* vst1 [Vt1.<T>, Vt2.<T>, ...], (Rn) */
r := int(p.To.Reg) r := int(p.To.Reg)
...@@ -4587,10 +4600,10 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { ...@@ -4587,10 +4600,10 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
case AEORW: case AEORW:
return S32 | 2<<29 | 0xA<<24 return S32 | 2<<29 | 0xA<<24
case AANDS: case AANDS, ATST:
return S64 | 3<<29 | 0xA<<24 return S64 | 3<<29 | 0xA<<24
case AANDSW: case AANDSW, ATSTW:
return S32 | 3<<29 | 0xA<<24 return S32 | 3<<29 | 0xA<<24
case ABIC: case ABIC:
...@@ -5165,10 +5178,10 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { ...@@ -5165,10 +5178,10 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 {
case AEORW, AEONW: case AEORW, AEONW:
return S32 | 2<<29 | 0x24<<23 | 0<<22 return S32 | 2<<29 | 0x24<<23 | 0<<22
case AANDS, ABICS: case AANDS, ABICS, ATST:
return S64 | 3<<29 | 0x24<<23 return S64 | 3<<29 | 0x24<<23
case AANDSW, ABICSW: case AANDSW, ABICSW, ATSTW:
return S32 | 3<<29 | 0x24<<23 | 0<<22 return S32 | 3<<29 | 0x24<<23 | 0<<22
case AASR: case AASR:
......
...@@ -311,7 +311,7 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { ...@@ -311,7 +311,7 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
// will zero the high 32-bit of the destination // will zero the high 32-bit of the destination
// register anyway. // register anyway.
switch p.As { switch p.As {
case AANDW, AORRW, AEORW, AANDSW: case AANDW, AORRW, AEORW, AANDSW, ATSTW:
if p.From.Type == obj.TYPE_CONST { if p.From.Type == obj.TYPE_CONST {
v := p.From.Offset & 0xffffffff v := p.From.Offset & 0xffffffff
p.From.Offset = v | v<<32 p.From.Offset = v | v<<32
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment