Commit c6bf9a81 authored by Ben Shi's avatar Ben Shi

cmd/compile: optimize AMD64's bit wise operation

Currently "arr[idx] |= 0x80" is compiled to MOVLload->BTSL->MOVLstore.
And this CL optimizes it to a single BTSLconstmodify. Other bit wise
operations with a direct memory operand are also implemented.

1. The size of the executable bin/go decreases about 4KB, and the total size
of pkg/linux_amd64 (excluding cmd/compile) decreases about 0.6KB.

2. There a little improvement in the go1 benchmark test (excluding noise).
name                     old time/op    new time/op    delta
BinaryTree17-4              2.66s ± 4%     2.66s ± 3%    ~     (p=0.596 n=49+49)
Fannkuch11-4                2.38s ± 2%     2.32s ± 2%  -2.69%  (p=0.000 n=50+50)
FmtFprintfEmpty-4          42.7ns ± 4%    43.2ns ± 7%  +1.31%  (p=0.009 n=50+50)
FmtFprintfString-4         71.0ns ± 5%    72.0ns ± 3%  +1.33%  (p=0.000 n=50+50)
FmtFprintfInt-4            80.7ns ± 4%    80.6ns ± 3%    ~     (p=0.931 n=50+50)
FmtFprintfIntInt-4          125ns ± 3%     126ns ± 4%    ~     (p=0.051 n=50+50)
FmtFprintfPrefixedInt-4     158ns ± 1%     142ns ± 3%  -9.84%  (p=0.000 n=36+50)
FmtFprintfFloat-4           215ns ± 4%     212ns ± 4%  -1.23%  (p=0.002 n=50+50)
FmtManyArgs-4               519ns ± 3%     510ns ± 3%  -1.77%  (p=0.000 n=50+50)
GobDecode-4                6.49ms ± 6%    6.52ms ± 5%    ~     (p=0.866 n=50+50)
GobEncode-4                5.93ms ± 8%    6.01ms ± 7%    ~     (p=0.076 n=50+50)
Gzip-4                      222ms ± 4%     224ms ± 8%  +0.80%  (p=0.001 n=50+50)
Gunzip-4                   36.6ms ± 5%    36.4ms ± 4%    ~     (p=0.093 n=50+50)
HTTPClientServer-4         59.1µs ± 1%    58.9µs ± 2%  -0.24%  (p=0.039 n=49+48)
JSONEncode-4               9.23ms ± 4%    9.21ms ± 5%    ~     (p=0.244 n=50+50)
JSONDecode-4               48.8ms ± 4%    48.7ms ± 4%    ~     (p=0.653 n=50+50)
Mandelbrot200-4            3.81ms ± 4%    3.80ms ± 3%    ~     (p=0.834 n=50+50)
GoParse-4                  3.20ms ± 5%    3.19ms ± 5%    ~     (p=0.494 n=50+50)
RegexpMatchEasy0_32-4      78.1ns ± 2%    77.4ns ± 3%  -0.86%  (p=0.005 n=50+50)
RegexpMatchEasy0_1K-4       233ns ± 3%     233ns ± 3%    ~     (p=0.074 n=50+50)
RegexpMatchEasy1_32-4      74.2ns ± 3%    73.4ns ± 3%  -1.06%  (p=0.000 n=50+50)
RegexpMatchEasy1_1K-4       369ns ± 2%     364ns ± 4%  -1.41%  (p=0.000 n=36+50)
RegexpMatchMedium_32-4      109ns ± 4%     107ns ± 3%  -2.06%  (p=0.001 n=50+50)
RegexpMatchMedium_1K-4     31.5µs ± 3%    30.8µs ± 3%  -2.20%  (p=0.000 n=50+50)
RegexpMatchHard_32-4       1.57µs ± 3%    1.56µs ± 2%  -0.57%  (p=0.016 n=50+50)
RegexpMatchHard_1K-4       47.4µs ± 4%    47.0µs ± 3%  -0.82%  (p=0.008 n=50+50)
Revcomp-4                   414ms ± 7%     412ms ± 7%    ~     (p=0.285 n=50+50)
Template-4                 64.3ms ± 4%    62.7ms ± 3%  -2.44%  (p=0.000 n=50+50)
TimeParse-4                 316ns ± 3%     313ns ± 3%    ~     (p=0.122 n=50+50)
TimeFormat-4                291ns ± 3%     293ns ± 3%  +0.80%  (p=0.001 n=50+50)
[Geo mean]                 46.5µs         46.2µs       -0.81%

name                     old speed      new speed      delta
GobDecode-4               118MB/s ± 6%   118MB/s ± 5%    ~     (p=0.863 n=50+50)
GobEncode-4               130MB/s ± 9%   128MB/s ± 8%    ~     (p=0.076 n=50+50)
Gzip-4                   87.4MB/s ± 4%  86.8MB/s ± 7%  -0.78%  (p=0.002 n=50+50)
Gunzip-4                  531MB/s ± 5%   533MB/s ± 4%    ~     (p=0.093 n=50+50)
JSONEncode-4              210MB/s ± 4%   211MB/s ± 5%    ~     (p=0.247 n=50+50)
JSONDecode-4             39.8MB/s ± 4%  39.9MB/s ± 4%    ~     (p=0.654 n=50+50)
GoParse-4                18.1MB/s ± 5%  18.2MB/s ± 5%    ~     (p=0.493 n=50+50)
RegexpMatchEasy0_32-4     410MB/s ± 2%   413MB/s ± 3%  +0.86%  (p=0.004 n=50+50)
RegexpMatchEasy0_1K-4    4.39GB/s ± 3%  4.38GB/s ± 3%    ~     (p=0.063 n=50+50)
RegexpMatchEasy1_32-4     432MB/s ± 3%   436MB/s ± 3%  +1.07%  (p=0.000 n=50+50)
RegexpMatchEasy1_1K-4    2.77GB/s ± 2%  2.81GB/s ± 4%  +1.46%  (p=0.000 n=36+50)
RegexpMatchMedium_32-4   9.16MB/s ± 3%  9.35MB/s ± 4%  +2.09%  (p=0.001 n=50+50)
RegexpMatchMedium_1K-4   32.5MB/s ± 3%  33.2MB/s ± 3%  +2.25%  (p=0.000 n=50+50)
RegexpMatchHard_32-4     20.4MB/s ± 3%  20.5MB/s ± 2%  +0.56%  (p=0.017 n=50+50)
RegexpMatchHard_1K-4     21.6MB/s ± 4%  21.8MB/s ± 3%  +0.83%  (p=0.008 n=50+50)
Revcomp-4                 613MB/s ± 4%   618MB/s ± 7%    ~     (p=0.152 n=48+50)
Template-4               30.2MB/s ± 4%  30.9MB/s ± 3%  +2.49%  (p=0.000 n=50+50)
[Geo mean]                127MB/s        128MB/s       +0.64%

Change-Id: If405198283855d75697f66cf894b2bef458f620e
Reviewed-on: https://go-review.googlesource.com/135422Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 713edf8b
...@@ -695,6 +695,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -695,6 +695,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg() p.To.Reg = v.Reg()
case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore, case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
ssa.OpAMD64BTCQmodify, ssa.OpAMD64BTCLmodify, ssa.OpAMD64BTRQmodify, ssa.OpAMD64BTRLmodify, ssa.OpAMD64BTSQmodify, ssa.OpAMD64BTSLmodify,
ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify, ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify: ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
...@@ -763,7 +764,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -763,7 +764,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
} }
fallthrough fallthrough
case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify, case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify: ssa.OpAMD64BTCQconstmodify, ssa.OpAMD64BTCLconstmodify, ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTSLconstmodify,
ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTRLconstmodify, ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
sc := v.AuxValAndOff() sc := v.AuxValAndOff()
off := sc.Off() off := sc.Off()
val := sc.Val() val := sc.Val()
......
...@@ -709,7 +709,17 @@ ...@@ -709,7 +709,17 @@
(ANDL x (MOVLconst [c])) -> (ANDLconst [c] x) (ANDL x (MOVLconst [c])) -> (ANDLconst [c] x)
(AND(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [c & d] x) (AND(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [c & d] x)
(BTR(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [d &^ 1<<uint32(c)] x)
(AND(L|Q)const [c] (BTR(L|Q)const [d] x)) -> (AND(L|Q)const [c &^ 1<<uint32(d)] x)
(BTR(L|Q)const [c] (BTR(L|Q)const [d] x)) -> (AND(L|Q)const [^(1<<uint32(c) | 1<<uint32(d))] x)
(XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) -> (XOR(L|Q)const [c ^ d] x) (XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) -> (XOR(L|Q)const [c ^ d] x)
(BTC(L|Q)const [c] (XOR(L|Q)const [d] x)) -> (XOR(L|Q)const [d ^ 1<<uint32(c)] x)
(XOR(L|Q)const [c] (BTC(L|Q)const [d] x)) -> (XOR(L|Q)const [c ^ 1<<uint32(d)] x)
(BTC(L|Q)const [c] (BTC(L|Q)const [d] x)) -> (XOR(L|Q)const [1<<uint32(c) ^ 1<<uint32(d)] x)
(OR(L|Q)const [c] (OR(L|Q)const [d] x)) -> (OR(L|Q)const [c | d] x)
(OR(L|Q)const [c] (BTS(L|Q)const [d] x)) -> (OR(L|Q)const [c | 1<<uint32(d)] x)
(BTS(L|Q)const [c] (OR(L|Q)const [d] x)) -> (OR(L|Q)const [d | 1<<uint32(c)] x)
(BTS(L|Q)const [c] (BTS(L|Q)const [d] x)) -> (OR(L|Q)const [1<<uint32(d) | 1<<uint32(c)] x)
(MULLconst [c] (MULLconst [d] x)) -> (MULLconst [int64(int32(c * d))] x) (MULLconst [c] (MULLconst [d] x)) -> (MULLconst [int64(int32(c * d))] x)
(MULQconst [c] (MULQconst [d] x)) && is32Bit(c*d) -> (MULQconst [c * d] x) (MULQconst [c] (MULQconst [d] x)) && is32Bit(c*d) -> (MULQconst [c * d] x)
...@@ -1051,14 +1061,14 @@ ...@@ -1051,14 +1061,14 @@
((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {sym} base val mem) ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {sym} base val mem)
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem) ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {sym} base val mem)
// Fold constants into stores. // Fold constants into stores.
(MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) ->
...@@ -1106,18 +1116,18 @@ ...@@ -1106,18 +1116,18 @@
((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) ((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) ->
((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) ->
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
// generating indexed loads and stores // generating indexed loads and stores
(MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> (MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
...@@ -1424,6 +1434,12 @@ ...@@ -1424,6 +1434,12 @@
(XORLconst [c] (MOVLconst [d])) -> (MOVLconst [c^d]) (XORLconst [c] (MOVLconst [d])) -> (MOVLconst [c^d])
(NOTQ (MOVQconst [c])) -> (MOVQconst [^c]) (NOTQ (MOVQconst [c])) -> (MOVQconst [^c])
(NOTL (MOVLconst [c])) -> (MOVLconst [^c]) (NOTL (MOVLconst [c])) -> (MOVLconst [^c])
(BTSQconst [c] (MOVQconst [d])) -> (MOVQconst [d|(1<<uint32(c))])
(BTSLconst [c] (MOVLconst [d])) -> (MOVLconst [d|(1<<uint32(c))])
(BTRQconst [c] (MOVQconst [d])) -> (MOVQconst [d&^(1<<uint32(c))])
(BTRLconst [c] (MOVLconst [d])) -> (MOVLconst [d&^(1<<uint32(c))])
(BTCQconst [c] (MOVQconst [d])) -> (MOVQconst [d^(1<<uint32(c))])
(BTCLconst [c] (MOVLconst [d])) -> (MOVLconst [d^(1<<uint32(c))])
// generic simplifications // generic simplifications
// TODO: more of this // TODO: more of this
...@@ -2304,11 +2320,11 @@ ...@@ -2304,11 +2320,11 @@
((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off] {sym} ptr x mem)
(MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) (MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> (MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off] {sym} ptr x mem)
// Merge ADDQconst and LEAQ into atomic loads. // Merge ADDQconst and LEAQ into atomic loads.
(MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
...@@ -2392,12 +2408,12 @@ ...@@ -2392,12 +2408,12 @@
(MOVWQZX (MOVBQZX x)) -> (MOVBQZX x) (MOVWQZX (MOVBQZX x)) -> (MOVBQZX x)
(MOVBQZX (MOVBQZX x)) -> (MOVBQZX x) (MOVBQZX (MOVBQZX x)) -> (MOVBQZX x)
(MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) (MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
&& isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) -> && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) ->
((ADD|AND|OR|XOR)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem) ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
(MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) (MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
&& isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) -> && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) ->
((ADD|AND|OR|XOR)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem) ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
// float <-> int register moves, with no conversion. // float <-> int register moves, with no conversion.
// These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}. // These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}.
......
...@@ -289,6 +289,20 @@ func init() { ...@@ -289,6 +289,20 @@ func init() {
{name: "BTSLconst", argLength: 1, reg: gp11, asm: "BTSL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 32 {name: "BTSLconst", argLength: 1, reg: gp11, asm: "BTSL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 32
{name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64 {name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64
// direct bit operation on memory operand
{name: "BTCQmodify", argLength: 3, reg: gpstore, asm: "BTCQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit arg1 in 64-bit arg0+auxint+aux, arg2=mem
{name: "BTCLmodify", argLength: 3, reg: gpstore, asm: "BTCL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit arg1 in 32-bit arg0+auxint+aux, arg2=mem
{name: "BTSQmodify", argLength: 3, reg: gpstore, asm: "BTSQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit arg1 in 64-bit arg0+auxint+aux, arg2=mem
{name: "BTSLmodify", argLength: 3, reg: gpstore, asm: "BTSL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit arg1 in 32-bit arg0+auxint+aux, arg2=mem
{name: "BTRQmodify", argLength: 3, reg: gpstore, asm: "BTRQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit arg1 in 64-bit arg0+auxint+aux, arg2=mem
{name: "BTRLmodify", argLength: 3, reg: gpstore, asm: "BTRL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit arg1 in 32-bit arg0+auxint+aux, arg2=mem
{name: "BTCQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "BTCLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "BTSQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "BTSLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "BTRQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "BTRLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"}, // (arg0 & arg1) compare to 0 {name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"}, // (arg0 & arg1) compare to 0
{name: "TESTL", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTL", typ: "Flags"}, // (arg0 & arg1) compare to 0 {name: "TESTL", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTL", typ: "Flags"}, // (arg0 & arg1) compare to 0
{name: "TESTW", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTW", typ: "Flags"}, // (arg0 & arg1) compare to 0 {name: "TESTW", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTW", typ: "Flags"}, // (arg0 & arg1) compare to 0
......
...@@ -550,6 +550,18 @@ const ( ...@@ -550,6 +550,18 @@ const (
OpAMD64BTRQconst OpAMD64BTRQconst
OpAMD64BTSLconst OpAMD64BTSLconst
OpAMD64BTSQconst OpAMD64BTSQconst
OpAMD64BTCQmodify
OpAMD64BTCLmodify
OpAMD64BTSQmodify
OpAMD64BTSLmodify
OpAMD64BTRQmodify
OpAMD64BTRLmodify
OpAMD64BTCQconstmodify
OpAMD64BTCLconstmodify
OpAMD64BTSQconstmodify
OpAMD64BTSLconstmodify
OpAMD64BTRQconstmodify
OpAMD64BTRLconstmodify
OpAMD64TESTQ OpAMD64TESTQ
OpAMD64TESTL OpAMD64TESTL
OpAMD64TESTW OpAMD64TESTW
...@@ -6901,6 +6913,180 @@ var opcodeTable = [...]opInfo{ ...@@ -6901,6 +6913,180 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "BTCQmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTCQ,
reg: regInfo{
inputs: []inputInfo{
{1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "BTCLmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTCL,
reg: regInfo{
inputs: []inputInfo{
{1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "BTSQmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTSQ,
reg: regInfo{
inputs: []inputInfo{
{1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "BTSLmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTSL,
reg: regInfo{
inputs: []inputInfo{
{1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "BTRQmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTRQ,
reg: regInfo{
inputs: []inputInfo{
{1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "BTRLmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTRL,
reg: regInfo{
inputs: []inputInfo{
{1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "BTCQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTCQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "BTCLconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTCL,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "BTSQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTSQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "BTSLconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTSL,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "BTRQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTRQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "BTRLconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTRL,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{ {
name: "TESTQ", name: "TESTQ",
argLen: 2, argLen: 2,
......
...@@ -270,6 +270,12 @@ func bitOpOnMem(a []uint32) { ...@@ -270,6 +270,12 @@ func bitOpOnMem(a []uint32) {
a[1] |= 220 a[1] |= 220
// amd64:`XORL\s[$]240,\s8\([A-Z]+\)` // amd64:`XORL\s[$]240,\s8\([A-Z]+\)`
a[2] ^= 240 a[2] ^= 240
// amd64:`BTRL\s[$]15,\s12\([A-Z]+\)`,-`ANDL`
a[3] &= 0xffff7fff
// amd64:`BTSL\s[$]14,\s16\([A-Z]+\)`,-`ORL`
a[4] |= 0x4000
// amd64:`BTCL\s[$]13,\s20\([A-Z]+\)`,-`XORL`
a[5] ^= 0x2000
} }
// Check AND masking on arm64 (Issue #19857) // Check AND masking on arm64 (Issue #19857)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment