Commit b75c5c59 authored by Ben Shi's avatar Ben Shi

cmd/compile: optimize AMD64 with more read-modify-write operations

6 more operations which do read-modify-write with a constant
source operand are added.

1. The total size of pkg/linux_amd64 decreases about 3KB, excluding
cmd/compile.

2. The go1 benckmark shows a slight improvement.
name                     old time/op    new time/op    delta
BinaryTree17-4              2.61s ± 4%     2.67s ± 2%  +2.26%  (p=0.000 n=30+29)
Fannkuch11-4                2.39s ± 2%     2.32s ± 2%  -2.67%  (p=0.000 n=30+30)
FmtFprintfEmpty-4          44.0ns ± 4%    41.7ns ± 4%  -5.15%  (p=0.000 n=30+30)
FmtFprintfString-4         74.2ns ± 4%    72.3ns ± 4%  -2.59%  (p=0.000 n=30+30)
FmtFprintfInt-4            81.7ns ± 3%    78.8ns ± 4%  -3.54%  (p=0.000 n=27+30)
FmtFprintfIntInt-4          130ns ± 4%     124ns ± 5%  -4.60%  (p=0.000 n=30+30)
FmtFprintfPrefixedInt-4     154ns ± 3%     152ns ± 3%  -1.13%  (p=0.012 n=30+30)
FmtFprintfFloat-4           215ns ± 4%     212ns ± 5%  -1.56%  (p=0.002 n=30+30)
FmtManyArgs-4               522ns ± 3%     512ns ± 3%  -1.84%  (p=0.001 n=30+30)
GobDecode-4                6.42ms ± 5%    6.49ms ± 7%    ~     (p=0.070 n=30+30)
GobEncode-4                6.07ms ± 8%    5.98ms ± 8%    ~     (p=0.150 n=30+30)
Gzip-4                      236ms ± 4%     223ms ± 4%  -5.57%  (p=0.000 n=30+30)
Gunzip-4                   37.4ms ± 3%    36.7ms ± 4%  -2.03%  (p=0.000 n=30+30)
HTTPClientServer-4         58.7µs ± 1%    58.5µs ± 2%  -0.37%  (p=0.018 n=30+29)
JSONEncode-4               12.0ms ± 4%    12.1ms ± 3%    ~     (p=0.112 n=30+30)
JSONDecode-4               54.5ms ± 3%    55.5ms ± 4%  +1.80%  (p=0.006 n=30+30)
Mandelbrot200-4            3.78ms ± 4%    3.78ms ± 4%    ~     (p=0.173 n=30+30)
GoParse-4                  3.16ms ± 5%    3.22ms ± 5%  +1.75%  (p=0.010 n=30+30)
RegexpMatchEasy0_32-4      76.6ns ± 1%    75.9ns ± 3%    ~     (p=0.672 n=25+30)
RegexpMatchEasy0_1K-4       252ns ± 3%     253ns ± 3%  +0.57%  (p=0.027 n=30+30)
RegexpMatchEasy1_32-4      69.8ns ± 4%    70.2ns ± 6%    ~     (p=0.539 n=30+30)
RegexpMatchEasy1_1K-4       374ns ± 3%     373ns ± 5%    ~     (p=0.263 n=30+30)
RegexpMatchMedium_32-4      107ns ± 4%     109ns ± 3%    ~     (p=0.067 n=30+30)
RegexpMatchMedium_1K-4     33.9µs ± 5%    34.1µs ± 4%    ~     (p=0.297 n=30+30)
RegexpMatchHard_32-4       1.54µs ± 3%    1.56µs ± 4%  +1.43%  (p=0.002 n=30+30)
RegexpMatchHard_1K-4       46.6µs ± 3%    47.0µs ± 3%    ~     (p=0.055 n=30+30)
Revcomp-4                   411ms ± 6%     407ms ± 6%    ~     (p=0.219 n=30+30)
Template-4                 66.8ms ± 3%    64.8ms ± 5%  -3.01%  (p=0.000 n=30+30)
TimeParse-4                 312ns ± 2%     319ns ± 3%  +2.50%  (p=0.000 n=30+30)
TimeFormat-4                296ns ± 5%     299ns ± 3%  +0.93%  (p=0.005 n=30+30)
[Geo mean]                 47.5µs         47.1µs       -0.75%

name                     old speed      new speed      delta
GobDecode-4               120MB/s ± 5%   118MB/s ± 6%    ~     (p=0.072 n=30+30)
GobEncode-4               127MB/s ± 8%   129MB/s ± 8%    ~     (p=0.150 n=30+30)
Gzip-4                   82.1MB/s ± 4%  87.0MB/s ± 4%  +5.90%  (p=0.000 n=30+30)
Gunzip-4                  519MB/s ± 4%   529MB/s ± 4%  +2.07%  (p=0.001 n=30+30)
JSONEncode-4              162MB/s ± 4%   161MB/s ± 3%    ~     (p=0.110 n=30+30)
JSONDecode-4             35.6MB/s ± 3%  35.0MB/s ± 4%  -1.77%  (p=0.007 n=30+30)
GoParse-4                18.3MB/s ± 4%  18.0MB/s ± 4%  -1.72%  (p=0.009 n=30+30)
RegexpMatchEasy0_32-4     418MB/s ± 1%   422MB/s ± 3%    ~     (p=0.645 n=25+30)
RegexpMatchEasy0_1K-4    4.06GB/s ± 3%  4.04GB/s ± 3%  -0.57%  (p=0.033 n=30+30)
RegexpMatchEasy1_32-4     459MB/s ± 4%   456MB/s ± 6%    ~     (p=0.530 n=30+30)
RegexpMatchEasy1_1K-4    2.73GB/s ± 3%  2.75GB/s ± 5%    ~     (p=0.279 n=30+30)
RegexpMatchMedium_32-4   9.28MB/s ± 5%  9.18MB/s ± 4%    ~     (p=0.086 n=30+30)
RegexpMatchMedium_1K-4   30.2MB/s ± 4%  30.0MB/s ± 4%    ~     (p=0.300 n=30+30)
RegexpMatchHard_32-4     20.8MB/s ± 3%  20.5MB/s ± 4%  -1.41%  (p=0.002 n=30+30)
RegexpMatchHard_1K-4     22.0MB/s ± 3%  21.8MB/s ± 3%    ~     (p=0.051 n=30+30)
Revcomp-4                 619MB/s ± 7%   625MB/s ± 7%    ~     (p=0.219 n=30+30)
Template-4               29.0MB/s ± 3%  29.9MB/s ± 4%  +3.11%  (p=0.000 n=30+30)
[Geo mean]                123MB/s        123MB/s       +0.28%

Change-Id: I850652cfd53329c1af804b7f57f4393d8097bb0d
Reviewed-on: https://go-review.googlesource.com/121135
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIlya Tocar <ilya.tocar@intel.com>
parent b40db514
...@@ -770,6 +770,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -770,6 +770,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = v.Args[0].Reg() p.To.Reg = v.Args[0].Reg()
gc.AddAux2(&p.To, v, off) gc.AddAux2(&p.To, v, off)
} }
case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
sc := v.AuxValAndOff()
off := sc.Off()
val := sc.Val()
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = val
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
gc.AddAux2(&p.To, v, off)
case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST p.From.Type = obj.TYPE_CONST
......
...@@ -1039,8 +1039,10 @@ ...@@ -1039,8 +1039,10 @@
((ADD|SUB|MUL)SSload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL)SSload [off1+off2] {sym} val base mem)
((ADD|SUB|MUL)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> ((ADD|SUB|MUL)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL)SDload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL)SDload [off1+off2] {sym} val base mem)
(ADD(L|Q)constmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> ((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
(ADD(L|Q)constmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
// Fold constants into stores. // Fold constants into stores.
(MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) ->
...@@ -1081,9 +1083,12 @@ ...@@ -1081,9 +1083,12 @@
((ADD|SUB|MUL)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) ((ADD|SUB|MUL)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
((ADD|SUB|MUL)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) ((ADD|SUB|MUL)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
(ADD(L|Q)constmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) ((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) ->
(ADD(L|Q)constmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) ->
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
// generating indexed loads and stores // generating indexed loads and stores
(MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> (MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
...@@ -2352,12 +2357,12 @@ ...@@ -2352,12 +2357,12 @@
(MOVWQZX (MOVBQZX x)) -> (MOVBQZX x) (MOVWQZX (MOVBQZX x)) -> (MOVBQZX x)
(MOVBQZX (MOVBQZX x)) -> (MOVBQZX x) (MOVBQZX (MOVBQZX x)) -> (MOVBQZX x)
(MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) (MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
&& isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) -> && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) ->
(ADDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) ((ADD|AND|OR|XOR)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
(MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) (MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
&& isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) -> && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) ->
(ADDLconstmodify {sym} [makeValAndOff(c,off)] ptr mem) ((ADD|AND|OR|XOR)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
// float <-> int register moves, with no conversion. // float <-> int register moves, with no conversion.
// These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}. // These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}.
......
...@@ -225,20 +225,26 @@ func init() { ...@@ -225,20 +225,26 @@ func init() {
{name: "MULQU2", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}, commutative: true, asm: "MULQ", clobberFlags: true}, // arg0 * arg1, returns (hi, lo) {name: "MULQU2", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}, commutative: true, asm: "MULQ", clobberFlags: true}, // arg0 * arg1, returns (hi, lo)
{name: "DIVQU2", argLength: 3, reg: regInfo{inputs: []regMask{dx, ax, gpsp}, outputs: []regMask{ax, dx}}, asm: "DIVQ", clobberFlags: true}, // arg0:arg1 / arg2 (128-bit divided by 64-bit), returns (q, r) {name: "DIVQU2", argLength: 3, reg: regInfo{inputs: []regMask{dx, ax, gpsp}, outputs: []regMask{ax, dx}}, asm: "DIVQ", clobberFlags: true}, // arg0:arg1 / arg2 (128-bit divided by 64-bit), returns (q, r)
{name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1 {name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
{name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1 {name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
{name: "ANDQconst", argLength: 1, reg: gp11, asm: "ANDQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint {name: "ANDQconst", argLength: 1, reg: gp11, asm: "ANDQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint
{name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint {name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint
{name: "ANDQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ANDQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // and ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "ORQ", argLength: 2, reg: gp21, asm: "ORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1 {name: "ANDLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ANDL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // and ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "ORL", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1
{name: "ORQconst", argLength: 1, reg: gp11, asm: "ORQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint {name: "ORQ", argLength: 2, reg: gp21, asm: "ORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1
{name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint {name: "ORL", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1
{name: "ORQconst", argLength: 1, reg: gp11, asm: "ORQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint
{name: "XORQ", argLength: 2, reg: gp21, asm: "XORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1 {name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint
{name: "XORL", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1 {name: "ORQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ORQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // or ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "XORQconst", argLength: 1, reg: gp11, asm: "XORQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint {name: "ORLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ORL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // or ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint
{name: "XORQ", argLength: 2, reg: gp21, asm: "XORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1
{name: "XORL", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1
{name: "XORQconst", argLength: 1, reg: gp11, asm: "XORQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint
{name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint
{name: "XORQconstmodify", argLength: 2, reg: gpstoreconst, asm: "XORQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // xor ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "XORLconstmodify", argLength: 2, reg: gpstoreconst, asm: "XORL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // xor ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "CMPQ", argLength: 2, reg: gp2flags, asm: "CMPQ", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPQ", argLength: 2, reg: gp2flags, asm: "CMPQ", typ: "Flags"}, // arg0 compare to arg1
{name: "CMPL", argLength: 2, reg: gp2flags, asm: "CMPL", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPL", argLength: 2, reg: gp2flags, asm: "CMPL", typ: "Flags"}, // arg0 compare to arg1
......
...@@ -487,14 +487,20 @@ const ( ...@@ -487,14 +487,20 @@ const (
OpAMD64ANDL OpAMD64ANDL
OpAMD64ANDQconst OpAMD64ANDQconst
OpAMD64ANDLconst OpAMD64ANDLconst
OpAMD64ANDQconstmodify
OpAMD64ANDLconstmodify
OpAMD64ORQ OpAMD64ORQ
OpAMD64ORL OpAMD64ORL
OpAMD64ORQconst OpAMD64ORQconst
OpAMD64ORLconst OpAMD64ORLconst
OpAMD64ORQconstmodify
OpAMD64ORLconstmodify
OpAMD64XORQ OpAMD64XORQ
OpAMD64XORL OpAMD64XORL
OpAMD64XORQconst OpAMD64XORQconst
OpAMD64XORLconst OpAMD64XORLconst
OpAMD64XORQconstmodify
OpAMD64XORLconstmodify
OpAMD64CMPQ OpAMD64CMPQ
OpAMD64CMPL OpAMD64CMPL
OpAMD64CMPW OpAMD64CMPW
...@@ -5948,6 +5954,34 @@ var opcodeTable = [...]opInfo{ ...@@ -5948,6 +5954,34 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "ANDQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AANDQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "ANDLconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AANDL,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{ {
name: "ORQ", name: "ORQ",
argLen: 2, argLen: 2,
...@@ -6014,6 +6048,34 @@ var opcodeTable = [...]opInfo{ ...@@ -6014,6 +6048,34 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "ORQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AORQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "ORLconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AORL,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{ {
name: "XORQ", name: "XORQ",
argLen: 2, argLen: 2,
...@@ -6080,6 +6142,34 @@ var opcodeTable = [...]opInfo{ ...@@ -6080,6 +6142,34 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "XORQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AXORQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "XORLconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AXORL,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{ {
name: "CMPQ", name: "CMPQ",
argLen: 2, argLen: 2,
......
...@@ -43,12 +43,16 @@ func rewriteValueAMD64(v *Value) bool { ...@@ -43,12 +43,16 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64ANDL_0(v) return rewriteValueAMD64_OpAMD64ANDL_0(v)
case OpAMD64ANDLconst: case OpAMD64ANDLconst:
return rewriteValueAMD64_OpAMD64ANDLconst_0(v) return rewriteValueAMD64_OpAMD64ANDLconst_0(v)
case OpAMD64ANDLconstmodify:
return rewriteValueAMD64_OpAMD64ANDLconstmodify_0(v)
case OpAMD64ANDLload: case OpAMD64ANDLload:
return rewriteValueAMD64_OpAMD64ANDLload_0(v) return rewriteValueAMD64_OpAMD64ANDLload_0(v)
case OpAMD64ANDQ: case OpAMD64ANDQ:
return rewriteValueAMD64_OpAMD64ANDQ_0(v) return rewriteValueAMD64_OpAMD64ANDQ_0(v)
case OpAMD64ANDQconst: case OpAMD64ANDQconst:
return rewriteValueAMD64_OpAMD64ANDQconst_0(v) return rewriteValueAMD64_OpAMD64ANDQconst_0(v)
case OpAMD64ANDQconstmodify:
return rewriteValueAMD64_OpAMD64ANDQconstmodify_0(v)
case OpAMD64ANDQload: case OpAMD64ANDQload:
return rewriteValueAMD64_OpAMD64ANDQload_0(v) return rewriteValueAMD64_OpAMD64ANDQload_0(v)
case OpAMD64BSFQ: case OpAMD64BSFQ:
...@@ -242,7 +246,7 @@ func rewriteValueAMD64(v *Value) bool { ...@@ -242,7 +246,7 @@ func rewriteValueAMD64(v *Value) bool {
case OpAMD64MOVQloadidx8: case OpAMD64MOVQloadidx8:
return rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v) return rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v)
case OpAMD64MOVQstore: case OpAMD64MOVQstore:
return rewriteValueAMD64_OpAMD64MOVQstore_0(v) return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v)
case OpAMD64MOVQstoreconst: case OpAMD64MOVQstoreconst:
return rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v) return rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v)
case OpAMD64MOVQstoreconstidx1: case OpAMD64MOVQstoreconstidx1:
...@@ -329,12 +333,16 @@ func rewriteValueAMD64(v *Value) bool { ...@@ -329,12 +333,16 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64ORL_0(v) || rewriteValueAMD64_OpAMD64ORL_10(v) || rewriteValueAMD64_OpAMD64ORL_20(v) || rewriteValueAMD64_OpAMD64ORL_30(v) || rewriteValueAMD64_OpAMD64ORL_40(v) || rewriteValueAMD64_OpAMD64ORL_50(v) || rewriteValueAMD64_OpAMD64ORL_60(v) || rewriteValueAMD64_OpAMD64ORL_70(v) || rewriteValueAMD64_OpAMD64ORL_80(v) || rewriteValueAMD64_OpAMD64ORL_90(v) || rewriteValueAMD64_OpAMD64ORL_100(v) || rewriteValueAMD64_OpAMD64ORL_110(v) || rewriteValueAMD64_OpAMD64ORL_120(v) || rewriteValueAMD64_OpAMD64ORL_130(v) return rewriteValueAMD64_OpAMD64ORL_0(v) || rewriteValueAMD64_OpAMD64ORL_10(v) || rewriteValueAMD64_OpAMD64ORL_20(v) || rewriteValueAMD64_OpAMD64ORL_30(v) || rewriteValueAMD64_OpAMD64ORL_40(v) || rewriteValueAMD64_OpAMD64ORL_50(v) || rewriteValueAMD64_OpAMD64ORL_60(v) || rewriteValueAMD64_OpAMD64ORL_70(v) || rewriteValueAMD64_OpAMD64ORL_80(v) || rewriteValueAMD64_OpAMD64ORL_90(v) || rewriteValueAMD64_OpAMD64ORL_100(v) || rewriteValueAMD64_OpAMD64ORL_110(v) || rewriteValueAMD64_OpAMD64ORL_120(v) || rewriteValueAMD64_OpAMD64ORL_130(v)
case OpAMD64ORLconst: case OpAMD64ORLconst:
return rewriteValueAMD64_OpAMD64ORLconst_0(v) return rewriteValueAMD64_OpAMD64ORLconst_0(v)
case OpAMD64ORLconstmodify:
return rewriteValueAMD64_OpAMD64ORLconstmodify_0(v)
case OpAMD64ORLload: case OpAMD64ORLload:
return rewriteValueAMD64_OpAMD64ORLload_0(v) return rewriteValueAMD64_OpAMD64ORLload_0(v)
case OpAMD64ORQ: case OpAMD64ORQ:
return rewriteValueAMD64_OpAMD64ORQ_0(v) || rewriteValueAMD64_OpAMD64ORQ_10(v) || rewriteValueAMD64_OpAMD64ORQ_20(v) || rewriteValueAMD64_OpAMD64ORQ_30(v) || rewriteValueAMD64_OpAMD64ORQ_40(v) || rewriteValueAMD64_OpAMD64ORQ_50(v) || rewriteValueAMD64_OpAMD64ORQ_60(v) || rewriteValueAMD64_OpAMD64ORQ_70(v) || rewriteValueAMD64_OpAMD64ORQ_80(v) || rewriteValueAMD64_OpAMD64ORQ_90(v) || rewriteValueAMD64_OpAMD64ORQ_100(v) || rewriteValueAMD64_OpAMD64ORQ_110(v) || rewriteValueAMD64_OpAMD64ORQ_120(v) || rewriteValueAMD64_OpAMD64ORQ_130(v) || rewriteValueAMD64_OpAMD64ORQ_140(v) || rewriteValueAMD64_OpAMD64ORQ_150(v) || rewriteValueAMD64_OpAMD64ORQ_160(v) return rewriteValueAMD64_OpAMD64ORQ_0(v) || rewriteValueAMD64_OpAMD64ORQ_10(v) || rewriteValueAMD64_OpAMD64ORQ_20(v) || rewriteValueAMD64_OpAMD64ORQ_30(v) || rewriteValueAMD64_OpAMD64ORQ_40(v) || rewriteValueAMD64_OpAMD64ORQ_50(v) || rewriteValueAMD64_OpAMD64ORQ_60(v) || rewriteValueAMD64_OpAMD64ORQ_70(v) || rewriteValueAMD64_OpAMD64ORQ_80(v) || rewriteValueAMD64_OpAMD64ORQ_90(v) || rewriteValueAMD64_OpAMD64ORQ_100(v) || rewriteValueAMD64_OpAMD64ORQ_110(v) || rewriteValueAMD64_OpAMD64ORQ_120(v) || rewriteValueAMD64_OpAMD64ORQ_130(v) || rewriteValueAMD64_OpAMD64ORQ_140(v) || rewriteValueAMD64_OpAMD64ORQ_150(v) || rewriteValueAMD64_OpAMD64ORQ_160(v)
case OpAMD64ORQconst: case OpAMD64ORQconst:
return rewriteValueAMD64_OpAMD64ORQconst_0(v) return rewriteValueAMD64_OpAMD64ORQconst_0(v)
case OpAMD64ORQconstmodify:
return rewriteValueAMD64_OpAMD64ORQconstmodify_0(v)
case OpAMD64ORQload: case OpAMD64ORQload:
return rewriteValueAMD64_OpAMD64ORQload_0(v) return rewriteValueAMD64_OpAMD64ORQload_0(v)
case OpAMD64ROLB: case OpAMD64ROLB:
...@@ -493,12 +501,16 @@ func rewriteValueAMD64(v *Value) bool { ...@@ -493,12 +501,16 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64XORL_0(v) || rewriteValueAMD64_OpAMD64XORL_10(v) return rewriteValueAMD64_OpAMD64XORL_0(v) || rewriteValueAMD64_OpAMD64XORL_10(v)
case OpAMD64XORLconst: case OpAMD64XORLconst:
return rewriteValueAMD64_OpAMD64XORLconst_0(v) || rewriteValueAMD64_OpAMD64XORLconst_10(v) return rewriteValueAMD64_OpAMD64XORLconst_0(v) || rewriteValueAMD64_OpAMD64XORLconst_10(v)
case OpAMD64XORLconstmodify:
return rewriteValueAMD64_OpAMD64XORLconstmodify_0(v)
case OpAMD64XORLload: case OpAMD64XORLload:
return rewriteValueAMD64_OpAMD64XORLload_0(v) return rewriteValueAMD64_OpAMD64XORLload_0(v)
case OpAMD64XORQ: case OpAMD64XORQ:
return rewriteValueAMD64_OpAMD64XORQ_0(v) || rewriteValueAMD64_OpAMD64XORQ_10(v) return rewriteValueAMD64_OpAMD64XORQ_0(v) || rewriteValueAMD64_OpAMD64XORQ_10(v)
case OpAMD64XORQconst: case OpAMD64XORQconst:
return rewriteValueAMD64_OpAMD64XORQconst_0(v) return rewriteValueAMD64_OpAMD64XORQconst_0(v)
case OpAMD64XORQconstmodify:
return rewriteValueAMD64_OpAMD64XORQconstmodify_0(v)
case OpAMD64XORQload: case OpAMD64XORQload:
return rewriteValueAMD64_OpAMD64XORQload_0(v) return rewriteValueAMD64_OpAMD64XORQload_0(v)
case OpAdd16: case OpAdd16:
...@@ -3480,6 +3492,58 @@ func rewriteValueAMD64_OpAMD64ANDLconst_0(v *Value) bool { ...@@ -3480,6 +3492,58 @@ func rewriteValueAMD64_OpAMD64ANDLconst_0(v *Value) bool {
} }
return false return false
} }
func rewriteValueAMD64_OpAMD64ANDLconstmodify_0(v *Value) bool {
// match: (ANDLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
// cond: ValAndOff(valoff1).canAdd(off2)
// result: (ANDLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
for {
valoff1 := v.AuxInt
sym := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64ADDQconst {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2)) {
break
}
v.reset(OpAMD64ANDLconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = sym
v.AddArg(base)
v.AddArg(mem)
return true
}
// match: (ANDLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
// result: (ANDLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
for {
valoff1 := v.AuxInt
sym1 := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64LEAQ {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpAMD64ANDLconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = mergeSym(sym1, sym2)
v.AddArg(base)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64ANDLload_0(v *Value) bool { func rewriteValueAMD64_OpAMD64ANDLload_0(v *Value) bool {
b := v.Block b := v.Block
_ = b _ = b
...@@ -3893,6 +3957,58 @@ func rewriteValueAMD64_OpAMD64ANDQconst_0(v *Value) bool { ...@@ -3893,6 +3957,58 @@ func rewriteValueAMD64_OpAMD64ANDQconst_0(v *Value) bool {
} }
return false return false
} }
func rewriteValueAMD64_OpAMD64ANDQconstmodify_0(v *Value) bool {
// match: (ANDQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
// cond: ValAndOff(valoff1).canAdd(off2)
// result: (ANDQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
for {
valoff1 := v.AuxInt
sym := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64ADDQconst {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2)) {
break
}
v.reset(OpAMD64ANDQconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = sym
v.AddArg(base)
v.AddArg(mem)
return true
}
// match: (ANDQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
// result: (ANDQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
for {
valoff1 := v.AuxInt
sym1 := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64LEAQ {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpAMD64ANDQconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = mergeSym(sym1, sym2)
v.AddArg(base)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64ANDQload_0(v *Value) bool { func rewriteValueAMD64_OpAMD64ANDQload_0(v *Value) bool {
b := v.Block b := v.Block
_ = b _ = b
...@@ -14306,6 +14422,123 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool { ...@@ -14306,6 +14422,123 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (MOVLstore [off] {sym} ptr a:(ANDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
// result: (ANDLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
a := v.Args[1]
if a.Op != OpAMD64ANDLconst {
break
}
c := a.AuxInt
l := a.Args[0]
if l.Op != OpAMD64MOVLload {
break
}
if l.AuxInt != off {
break
}
if l.Aux != sym {
break
}
_ = l.Args[1]
ptr2 := l.Args[0]
mem := l.Args[1]
if mem != v.Args[2] {
break
}
if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
break
}
v.reset(OpAMD64ANDLconstmodify)
v.AuxInt = makeValAndOff(c, off)
v.Aux = sym
v.AddArg(ptr)
v.AddArg(mem)
return true
}
// match: (MOVLstore [off] {sym} ptr a:(ORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
// result: (ORLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
a := v.Args[1]
if a.Op != OpAMD64ORLconst {
break
}
c := a.AuxInt
l := a.Args[0]
if l.Op != OpAMD64MOVLload {
break
}
if l.AuxInt != off {
break
}
if l.Aux != sym {
break
}
_ = l.Args[1]
ptr2 := l.Args[0]
mem := l.Args[1]
if mem != v.Args[2] {
break
}
if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
break
}
v.reset(OpAMD64ORLconstmodify)
v.AuxInt = makeValAndOff(c, off)
v.Aux = sym
v.AddArg(ptr)
v.AddArg(mem)
return true
}
// match: (MOVLstore [off] {sym} ptr a:(XORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
// result: (XORLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
a := v.Args[1]
if a.Op != OpAMD64XORLconst {
break
}
c := a.AuxInt
l := a.Args[0]
if l.Op != OpAMD64MOVLload {
break
}
if l.AuxInt != off {
break
}
if l.Aux != sym {
break
}
_ = l.Args[1]
ptr2 := l.Args[0]
mem := l.Args[1]
if mem != v.Args[2] {
break
}
if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
break
}
v.reset(OpAMD64XORLconstmodify)
v.AuxInt = makeValAndOff(c, off)
v.Aux = sym
v.AddArg(ptr)
v.AddArg(mem)
return true
}
// match: (MOVLstore [off] {sym} ptr (MOVLf2i val) mem) // match: (MOVLstore [off] {sym} ptr (MOVLf2i val) mem)
// cond: // cond:
// result: (MOVSSstore [off] {sym} ptr val mem) // result: (MOVSSstore [off] {sym} ptr val mem)
...@@ -16292,6 +16525,126 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool { ...@@ -16292,6 +16525,126 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (MOVQstore [off] {sym} ptr a:(ANDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
// result: (ANDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
a := v.Args[1]
if a.Op != OpAMD64ANDQconst {
break
}
c := a.AuxInt
l := a.Args[0]
if l.Op != OpAMD64MOVQload {
break
}
if l.AuxInt != off {
break
}
if l.Aux != sym {
break
}
_ = l.Args[1]
ptr2 := l.Args[0]
mem := l.Args[1]
if mem != v.Args[2] {
break
}
if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
break
}
v.reset(OpAMD64ANDQconstmodify)
v.AuxInt = makeValAndOff(c, off)
v.Aux = sym
v.AddArg(ptr)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64MOVQstore_10(v *Value) bool {
// match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
// result: (ORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
a := v.Args[1]
if a.Op != OpAMD64ORQconst {
break
}
c := a.AuxInt
l := a.Args[0]
if l.Op != OpAMD64MOVQload {
break
}
if l.AuxInt != off {
break
}
if l.Aux != sym {
break
}
_ = l.Args[1]
ptr2 := l.Args[0]
mem := l.Args[1]
if mem != v.Args[2] {
break
}
if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
break
}
v.reset(OpAMD64ORQconstmodify)
v.AuxInt = makeValAndOff(c, off)
v.Aux = sym
v.AddArg(ptr)
v.AddArg(mem)
return true
}
// match: (MOVQstore [off] {sym} ptr a:(XORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
// result: (XORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
a := v.Args[1]
if a.Op != OpAMD64XORQconst {
break
}
c := a.AuxInt
l := a.Args[0]
if l.Op != OpAMD64MOVQload {
break
}
if l.AuxInt != off {
break
}
if l.Aux != sym {
break
}
_ = l.Args[1]
ptr2 := l.Args[0]
mem := l.Args[1]
if mem != v.Args[2] {
break
}
if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
break
}
v.reset(OpAMD64XORQconstmodify)
v.AuxInt = makeValAndOff(c, off)
v.Aux = sym
v.AddArg(ptr)
v.AddArg(mem)
return true
}
// match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem) // match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem)
// cond: // cond:
// result: (MOVSDstore [off] {sym} ptr val mem) // result: (MOVSDstore [off] {sym} ptr val mem)
...@@ -30779,6 +31132,58 @@ func rewriteValueAMD64_OpAMD64ORLconst_0(v *Value) bool { ...@@ -30779,6 +31132,58 @@ func rewriteValueAMD64_OpAMD64ORLconst_0(v *Value) bool {
} }
return false return false
} }
func rewriteValueAMD64_OpAMD64ORLconstmodify_0(v *Value) bool {
// match: (ORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
// cond: ValAndOff(valoff1).canAdd(off2)
// result: (ORLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
for {
valoff1 := v.AuxInt
sym := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64ADDQconst {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2)) {
break
}
v.reset(OpAMD64ORLconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = sym
v.AddArg(base)
v.AddArg(mem)
return true
}
// match: (ORLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
// result: (ORLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
for {
valoff1 := v.AuxInt
sym1 := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64LEAQ {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpAMD64ORLconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = mergeSym(sym1, sym2)
v.AddArg(base)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64ORLload_0(v *Value) bool { func rewriteValueAMD64_OpAMD64ORLload_0(v *Value) bool {
b := v.Block b := v.Block
_ = b _ = b
...@@ -41688,6 +42093,58 @@ func rewriteValueAMD64_OpAMD64ORQconst_0(v *Value) bool { ...@@ -41688,6 +42093,58 @@ func rewriteValueAMD64_OpAMD64ORQconst_0(v *Value) bool {
} }
return false return false
} }
func rewriteValueAMD64_OpAMD64ORQconstmodify_0(v *Value) bool {
// match: (ORQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
// cond: ValAndOff(valoff1).canAdd(off2)
// result: (ORQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
for {
valoff1 := v.AuxInt
sym := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64ADDQconst {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2)) {
break
}
v.reset(OpAMD64ORQconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = sym
v.AddArg(base)
v.AddArg(mem)
return true
}
// match: (ORQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
// result: (ORQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
for {
valoff1 := v.AuxInt
sym1 := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64LEAQ {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpAMD64ORQconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = mergeSym(sym1, sym2)
v.AddArg(base)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64ORQload_0(v *Value) bool { func rewriteValueAMD64_OpAMD64ORQload_0(v *Value) bool {
b := v.Block b := v.Block
_ = b _ = b
...@@ -52184,6 +52641,58 @@ func rewriteValueAMD64_OpAMD64XORLconst_10(v *Value) bool { ...@@ -52184,6 +52641,58 @@ func rewriteValueAMD64_OpAMD64XORLconst_10(v *Value) bool {
} }
return false return false
} }
func rewriteValueAMD64_OpAMD64XORLconstmodify_0(v *Value) bool {
// match: (XORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
// cond: ValAndOff(valoff1).canAdd(off2)
// result: (XORLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
for {
valoff1 := v.AuxInt
sym := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64ADDQconst {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2)) {
break
}
v.reset(OpAMD64XORLconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = sym
v.AddArg(base)
v.AddArg(mem)
return true
}
// match: (XORLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
// result: (XORLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
for {
valoff1 := v.AuxInt
sym1 := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64LEAQ {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpAMD64XORLconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = mergeSym(sym1, sym2)
v.AddArg(base)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64XORLload_0(v *Value) bool { func rewriteValueAMD64_OpAMD64XORLload_0(v *Value) bool {
b := v.Block b := v.Block
_ = b _ = b
...@@ -52598,6 +53107,58 @@ func rewriteValueAMD64_OpAMD64XORQconst_0(v *Value) bool { ...@@ -52598,6 +53107,58 @@ func rewriteValueAMD64_OpAMD64XORQconst_0(v *Value) bool {
} }
return false return false
} }
func rewriteValueAMD64_OpAMD64XORQconstmodify_0(v *Value) bool {
// match: (XORQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
// cond: ValAndOff(valoff1).canAdd(off2)
// result: (XORQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
for {
valoff1 := v.AuxInt
sym := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64ADDQconst {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2)) {
break
}
v.reset(OpAMD64XORQconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = sym
v.AddArg(base)
v.AddArg(mem)
return true
}
// match: (XORQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
// result: (XORQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
for {
valoff1 := v.AuxInt
sym1 := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64LEAQ {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpAMD64XORQconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = mergeSym(sym1, sym2)
v.AddArg(base)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64XORQload_0(v *Value) bool { func rewriteValueAMD64_OpAMD64XORQload_0(v *Value) bool {
b := v.Block b := v.Block
_ = b _ = b
...@@ -262,6 +262,16 @@ func bitcompl32(a, b uint32) (n uint32) { ...@@ -262,6 +262,16 @@ func bitcompl32(a, b uint32) (n uint32) {
return n return n
} }
// check direct operation on memory with constant source
func bitOpOnMem(a []uint32) {
// amd64:`ANDL\s[$]200,\s\([A-Z]+\)`
a[0] &= 200
// amd64:`ORL\s[$]220,\s4\([A-Z]+\)`
a[1] |= 220
// amd64:`XORL\s[$]240,\s8\([A-Z]+\)`
a[2] ^= 240
}
// Check AND masking on arm64 (Issue #19857) // Check AND masking on arm64 (Issue #19857)
func and_mask_1(a uint64) uint64 { func and_mask_1(a uint64) uint64 {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment