Commit b75c5c59 authored by Ben Shi's avatar Ben Shi

cmd/compile: optimize AMD64 with more read-modify-write operations

6 more operations which do read-modify-write with a constant
source operand are added.

1. The total size of pkg/linux_amd64 decreases about 3KB, excluding
cmd/compile.

2. The go1 benckmark shows a slight improvement.
name                     old time/op    new time/op    delta
BinaryTree17-4              2.61s ± 4%     2.67s ± 2%  +2.26%  (p=0.000 n=30+29)
Fannkuch11-4                2.39s ± 2%     2.32s ± 2%  -2.67%  (p=0.000 n=30+30)
FmtFprintfEmpty-4          44.0ns ± 4%    41.7ns ± 4%  -5.15%  (p=0.000 n=30+30)
FmtFprintfString-4         74.2ns ± 4%    72.3ns ± 4%  -2.59%  (p=0.000 n=30+30)
FmtFprintfInt-4            81.7ns ± 3%    78.8ns ± 4%  -3.54%  (p=0.000 n=27+30)
FmtFprintfIntInt-4          130ns ± 4%     124ns ± 5%  -4.60%  (p=0.000 n=30+30)
FmtFprintfPrefixedInt-4     154ns ± 3%     152ns ± 3%  -1.13%  (p=0.012 n=30+30)
FmtFprintfFloat-4           215ns ± 4%     212ns ± 5%  -1.56%  (p=0.002 n=30+30)
FmtManyArgs-4               522ns ± 3%     512ns ± 3%  -1.84%  (p=0.001 n=30+30)
GobDecode-4                6.42ms ± 5%    6.49ms ± 7%    ~     (p=0.070 n=30+30)
GobEncode-4                6.07ms ± 8%    5.98ms ± 8%    ~     (p=0.150 n=30+30)
Gzip-4                      236ms ± 4%     223ms ± 4%  -5.57%  (p=0.000 n=30+30)
Gunzip-4                   37.4ms ± 3%    36.7ms ± 4%  -2.03%  (p=0.000 n=30+30)
HTTPClientServer-4         58.7µs ± 1%    58.5µs ± 2%  -0.37%  (p=0.018 n=30+29)
JSONEncode-4               12.0ms ± 4%    12.1ms ± 3%    ~     (p=0.112 n=30+30)
JSONDecode-4               54.5ms ± 3%    55.5ms ± 4%  +1.80%  (p=0.006 n=30+30)
Mandelbrot200-4            3.78ms ± 4%    3.78ms ± 4%    ~     (p=0.173 n=30+30)
GoParse-4                  3.16ms ± 5%    3.22ms ± 5%  +1.75%  (p=0.010 n=30+30)
RegexpMatchEasy0_32-4      76.6ns ± 1%    75.9ns ± 3%    ~     (p=0.672 n=25+30)
RegexpMatchEasy0_1K-4       252ns ± 3%     253ns ± 3%  +0.57%  (p=0.027 n=30+30)
RegexpMatchEasy1_32-4      69.8ns ± 4%    70.2ns ± 6%    ~     (p=0.539 n=30+30)
RegexpMatchEasy1_1K-4       374ns ± 3%     373ns ± 5%    ~     (p=0.263 n=30+30)
RegexpMatchMedium_32-4      107ns ± 4%     109ns ± 3%    ~     (p=0.067 n=30+30)
RegexpMatchMedium_1K-4     33.9µs ± 5%    34.1µs ± 4%    ~     (p=0.297 n=30+30)
RegexpMatchHard_32-4       1.54µs ± 3%    1.56µs ± 4%  +1.43%  (p=0.002 n=30+30)
RegexpMatchHard_1K-4       46.6µs ± 3%    47.0µs ± 3%    ~     (p=0.055 n=30+30)
Revcomp-4                   411ms ± 6%     407ms ± 6%    ~     (p=0.219 n=30+30)
Template-4                 66.8ms ± 3%    64.8ms ± 5%  -3.01%  (p=0.000 n=30+30)
TimeParse-4                 312ns ± 2%     319ns ± 3%  +2.50%  (p=0.000 n=30+30)
TimeFormat-4                296ns ± 5%     299ns ± 3%  +0.93%  (p=0.005 n=30+30)
[Geo mean]                 47.5µs         47.1µs       -0.75%

name                     old speed      new speed      delta
GobDecode-4               120MB/s ± 5%   118MB/s ± 6%    ~     (p=0.072 n=30+30)
GobEncode-4               127MB/s ± 8%   129MB/s ± 8%    ~     (p=0.150 n=30+30)
Gzip-4                   82.1MB/s ± 4%  87.0MB/s ± 4%  +5.90%  (p=0.000 n=30+30)
Gunzip-4                  519MB/s ± 4%   529MB/s ± 4%  +2.07%  (p=0.001 n=30+30)
JSONEncode-4              162MB/s ± 4%   161MB/s ± 3%    ~     (p=0.110 n=30+30)
JSONDecode-4             35.6MB/s ± 3%  35.0MB/s ± 4%  -1.77%  (p=0.007 n=30+30)
GoParse-4                18.3MB/s ± 4%  18.0MB/s ± 4%  -1.72%  (p=0.009 n=30+30)
RegexpMatchEasy0_32-4     418MB/s ± 1%   422MB/s ± 3%    ~     (p=0.645 n=25+30)
RegexpMatchEasy0_1K-4    4.06GB/s ± 3%  4.04GB/s ± 3%  -0.57%  (p=0.033 n=30+30)
RegexpMatchEasy1_32-4     459MB/s ± 4%   456MB/s ± 6%    ~     (p=0.530 n=30+30)
RegexpMatchEasy1_1K-4    2.73GB/s ± 3%  2.75GB/s ± 5%    ~     (p=0.279 n=30+30)
RegexpMatchMedium_32-4   9.28MB/s ± 5%  9.18MB/s ± 4%    ~     (p=0.086 n=30+30)
RegexpMatchMedium_1K-4   30.2MB/s ± 4%  30.0MB/s ± 4%    ~     (p=0.300 n=30+30)
RegexpMatchHard_32-4     20.8MB/s ± 3%  20.5MB/s ± 4%  -1.41%  (p=0.002 n=30+30)
RegexpMatchHard_1K-4     22.0MB/s ± 3%  21.8MB/s ± 3%    ~     (p=0.051 n=30+30)
Revcomp-4                 619MB/s ± 7%   625MB/s ± 7%    ~     (p=0.219 n=30+30)
Template-4               29.0MB/s ± 3%  29.9MB/s ± 4%  +3.11%  (p=0.000 n=30+30)
[Geo mean]                123MB/s        123MB/s       +0.28%

Change-Id: I850652cfd53329c1af804b7f57f4393d8097bb0d
Reviewed-on: https://go-review.googlesource.com/121135
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIlya Tocar <ilya.tocar@intel.com>
parent b40db514
......@@ -770,6 +770,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = v.Args[0].Reg()
gc.AddAux2(&p.To, v, off)
}
case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
sc := v.AuxValAndOff()
off := sc.Off()
val := sc.Val()
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = val
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
gc.AddAux2(&p.To, v, off)
case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
......
......@@ -1039,8 +1039,10 @@
((ADD|SUB|MUL)SSload [off1+off2] {sym} val base mem)
((ADD|SUB|MUL)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL)SDload [off1+off2] {sym} val base mem)
(ADD(L|Q)constmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
(ADD(L|Q)constmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
// Fold constants into stores.
(MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) ->
......@@ -1081,9 +1083,12 @@
((ADD|SUB|MUL)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
((ADD|SUB|MUL)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
(ADD(L|Q)constmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) ->
(ADD(L|Q)constmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) ->
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
// generating indexed loads and stores
(MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
......@@ -2352,12 +2357,12 @@
(MOVWQZX (MOVBQZX x)) -> (MOVBQZX x)
(MOVBQZX (MOVBQZX x)) -> (MOVBQZX x)
(MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
(MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
&& isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) ->
(ADDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
(MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
((ADD|AND|OR|XOR)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
(MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
&& isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) ->
(ADDLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
((ADD|AND|OR|XOR)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
// float <-> int register moves, with no conversion.
// These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}.
......
......@@ -229,16 +229,22 @@ func init() {
{name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
{name: "ANDQconst", argLength: 1, reg: gp11, asm: "ANDQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint
{name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint
{name: "ANDQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ANDQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // and ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "ANDLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ANDL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // and ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "ORQ", argLength: 2, reg: gp21, asm: "ORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1
{name: "ORL", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1
{name: "ORQconst", argLength: 1, reg: gp11, asm: "ORQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint
{name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint
{name: "ORQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ORQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // or ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "ORLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ORL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // or ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "XORQ", argLength: 2, reg: gp21, asm: "XORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1
{name: "XORL", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1
{name: "XORQconst", argLength: 1, reg: gp11, asm: "XORQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint
{name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint
{name: "XORQconstmodify", argLength: 2, reg: gpstoreconst, asm: "XORQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // xor ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "XORLconstmodify", argLength: 2, reg: gpstoreconst, asm: "XORL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // xor ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "CMPQ", argLength: 2, reg: gp2flags, asm: "CMPQ", typ: "Flags"}, // arg0 compare to arg1
{name: "CMPL", argLength: 2, reg: gp2flags, asm: "CMPL", typ: "Flags"}, // arg0 compare to arg1
......
......@@ -487,14 +487,20 @@ const (
OpAMD64ANDL
OpAMD64ANDQconst
OpAMD64ANDLconst
OpAMD64ANDQconstmodify
OpAMD64ANDLconstmodify
OpAMD64ORQ
OpAMD64ORL
OpAMD64ORQconst
OpAMD64ORLconst
OpAMD64ORQconstmodify
OpAMD64ORLconstmodify
OpAMD64XORQ
OpAMD64XORL
OpAMD64XORQconst
OpAMD64XORLconst
OpAMD64XORQconstmodify
OpAMD64XORLconstmodify
OpAMD64CMPQ
OpAMD64CMPL
OpAMD64CMPW
......@@ -5948,6 +5954,34 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "ANDQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AANDQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "ANDLconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AANDL,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "ORQ",
argLen: 2,
......@@ -6014,6 +6048,34 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "ORQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AORQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "ORLconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AORL,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "XORQ",
argLen: 2,
......@@ -6080,6 +6142,34 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "XORQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AXORQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "XORLconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AXORL,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
},
},
},
{
name: "CMPQ",
argLen: 2,
......
......@@ -43,12 +43,16 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64ANDL_0(v)
case OpAMD64ANDLconst:
return rewriteValueAMD64_OpAMD64ANDLconst_0(v)
case OpAMD64ANDLconstmodify:
return rewriteValueAMD64_OpAMD64ANDLconstmodify_0(v)
case OpAMD64ANDLload:
return rewriteValueAMD64_OpAMD64ANDLload_0(v)
case OpAMD64ANDQ:
return rewriteValueAMD64_OpAMD64ANDQ_0(v)
case OpAMD64ANDQconst:
return rewriteValueAMD64_OpAMD64ANDQconst_0(v)
case OpAMD64ANDQconstmodify:
return rewriteValueAMD64_OpAMD64ANDQconstmodify_0(v)
case OpAMD64ANDQload:
return rewriteValueAMD64_OpAMD64ANDQload_0(v)
case OpAMD64BSFQ:
......@@ -242,7 +246,7 @@ func rewriteValueAMD64(v *Value) bool {
case OpAMD64MOVQloadidx8:
return rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v)
case OpAMD64MOVQstore:
return rewriteValueAMD64_OpAMD64MOVQstore_0(v)
return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v)
case OpAMD64MOVQstoreconst:
return rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v)
case OpAMD64MOVQstoreconstidx1:
......@@ -329,12 +333,16 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64ORL_0(v) || rewriteValueAMD64_OpAMD64ORL_10(v) || rewriteValueAMD64_OpAMD64ORL_20(v) || rewriteValueAMD64_OpAMD64ORL_30(v) || rewriteValueAMD64_OpAMD64ORL_40(v) || rewriteValueAMD64_OpAMD64ORL_50(v) || rewriteValueAMD64_OpAMD64ORL_60(v) || rewriteValueAMD64_OpAMD64ORL_70(v) || rewriteValueAMD64_OpAMD64ORL_80(v) || rewriteValueAMD64_OpAMD64ORL_90(v) || rewriteValueAMD64_OpAMD64ORL_100(v) || rewriteValueAMD64_OpAMD64ORL_110(v) || rewriteValueAMD64_OpAMD64ORL_120(v) || rewriteValueAMD64_OpAMD64ORL_130(v)
case OpAMD64ORLconst:
return rewriteValueAMD64_OpAMD64ORLconst_0(v)
case OpAMD64ORLconstmodify:
return rewriteValueAMD64_OpAMD64ORLconstmodify_0(v)
case OpAMD64ORLload:
return rewriteValueAMD64_OpAMD64ORLload_0(v)
case OpAMD64ORQ:
return rewriteValueAMD64_OpAMD64ORQ_0(v) || rewriteValueAMD64_OpAMD64ORQ_10(v) || rewriteValueAMD64_OpAMD64ORQ_20(v) || rewriteValueAMD64_OpAMD64ORQ_30(v) || rewriteValueAMD64_OpAMD64ORQ_40(v) || rewriteValueAMD64_OpAMD64ORQ_50(v) || rewriteValueAMD64_OpAMD64ORQ_60(v) || rewriteValueAMD64_OpAMD64ORQ_70(v) || rewriteValueAMD64_OpAMD64ORQ_80(v) || rewriteValueAMD64_OpAMD64ORQ_90(v) || rewriteValueAMD64_OpAMD64ORQ_100(v) || rewriteValueAMD64_OpAMD64ORQ_110(v) || rewriteValueAMD64_OpAMD64ORQ_120(v) || rewriteValueAMD64_OpAMD64ORQ_130(v) || rewriteValueAMD64_OpAMD64ORQ_140(v) || rewriteValueAMD64_OpAMD64ORQ_150(v) || rewriteValueAMD64_OpAMD64ORQ_160(v)
case OpAMD64ORQconst:
return rewriteValueAMD64_OpAMD64ORQconst_0(v)
case OpAMD64ORQconstmodify:
return rewriteValueAMD64_OpAMD64ORQconstmodify_0(v)
case OpAMD64ORQload:
return rewriteValueAMD64_OpAMD64ORQload_0(v)
case OpAMD64ROLB:
......@@ -493,12 +501,16 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64XORL_0(v) || rewriteValueAMD64_OpAMD64XORL_10(v)
case OpAMD64XORLconst:
return rewriteValueAMD64_OpAMD64XORLconst_0(v) || rewriteValueAMD64_OpAMD64XORLconst_10(v)
case OpAMD64XORLconstmodify:
return rewriteValueAMD64_OpAMD64XORLconstmodify_0(v)
case OpAMD64XORLload:
return rewriteValueAMD64_OpAMD64XORLload_0(v)
case OpAMD64XORQ:
return rewriteValueAMD64_OpAMD64XORQ_0(v) || rewriteValueAMD64_OpAMD64XORQ_10(v)
case OpAMD64XORQconst:
return rewriteValueAMD64_OpAMD64XORQconst_0(v)
case OpAMD64XORQconstmodify:
return rewriteValueAMD64_OpAMD64XORQconstmodify_0(v)
case OpAMD64XORQload:
return rewriteValueAMD64_OpAMD64XORQload_0(v)
case OpAdd16:
......@@ -3480,6 +3492,58 @@ func rewriteValueAMD64_OpAMD64ANDLconst_0(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64ANDLconstmodify_0(v *Value) bool {
// match: (ANDLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
// cond: ValAndOff(valoff1).canAdd(off2)
// result: (ANDLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
for {
valoff1 := v.AuxInt
sym := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64ADDQconst {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2)) {
break
}
v.reset(OpAMD64ANDLconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = sym
v.AddArg(base)
v.AddArg(mem)
return true
}
// match: (ANDLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
// result: (ANDLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
for {
valoff1 := v.AuxInt
sym1 := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64LEAQ {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpAMD64ANDLconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = mergeSym(sym1, sym2)
v.AddArg(base)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64ANDLload_0(v *Value) bool {
b := v.Block
_ = b
......@@ -3893,6 +3957,58 @@ func rewriteValueAMD64_OpAMD64ANDQconst_0(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64ANDQconstmodify_0(v *Value) bool {
// match: (ANDQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
// cond: ValAndOff(valoff1).canAdd(off2)
// result: (ANDQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
for {
valoff1 := v.AuxInt
sym := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64ADDQconst {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2)) {
break
}
v.reset(OpAMD64ANDQconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = sym
v.AddArg(base)
v.AddArg(mem)
return true
}
// match: (ANDQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
// result: (ANDQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
for {
valoff1 := v.AuxInt
sym1 := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64LEAQ {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpAMD64ANDQconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = mergeSym(sym1, sym2)
v.AddArg(base)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64ANDQload_0(v *Value) bool {
b := v.Block
_ = b
......@@ -14306,6 +14422,123 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
v.AddArg(mem)
return true
}
// match: (MOVLstore [off] {sym} ptr a:(ANDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
// result: (ANDLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
a := v.Args[1]
if a.Op != OpAMD64ANDLconst {
break
}
c := a.AuxInt
l := a.Args[0]
if l.Op != OpAMD64MOVLload {
break
}
if l.AuxInt != off {
break
}
if l.Aux != sym {
break
}
_ = l.Args[1]
ptr2 := l.Args[0]
mem := l.Args[1]
if mem != v.Args[2] {
break
}
if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
break
}
v.reset(OpAMD64ANDLconstmodify)
v.AuxInt = makeValAndOff(c, off)
v.Aux = sym
v.AddArg(ptr)
v.AddArg(mem)
return true
}
// match: (MOVLstore [off] {sym} ptr a:(ORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
// result: (ORLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
a := v.Args[1]
if a.Op != OpAMD64ORLconst {
break
}
c := a.AuxInt
l := a.Args[0]
if l.Op != OpAMD64MOVLload {
break
}
if l.AuxInt != off {
break
}
if l.Aux != sym {
break
}
_ = l.Args[1]
ptr2 := l.Args[0]
mem := l.Args[1]
if mem != v.Args[2] {
break
}
if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
break
}
v.reset(OpAMD64ORLconstmodify)
v.AuxInt = makeValAndOff(c, off)
v.Aux = sym
v.AddArg(ptr)
v.AddArg(mem)
return true
}
// match: (MOVLstore [off] {sym} ptr a:(XORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
// result: (XORLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
a := v.Args[1]
if a.Op != OpAMD64XORLconst {
break
}
c := a.AuxInt
l := a.Args[0]
if l.Op != OpAMD64MOVLload {
break
}
if l.AuxInt != off {
break
}
if l.Aux != sym {
break
}
_ = l.Args[1]
ptr2 := l.Args[0]
mem := l.Args[1]
if mem != v.Args[2] {
break
}
if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
break
}
v.reset(OpAMD64XORLconstmodify)
v.AuxInt = makeValAndOff(c, off)
v.Aux = sym
v.AddArg(ptr)
v.AddArg(mem)
return true
}
// match: (MOVLstore [off] {sym} ptr (MOVLf2i val) mem)
// cond:
// result: (MOVSSstore [off] {sym} ptr val mem)
......@@ -16292,6 +16525,126 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
v.AddArg(mem)
return true
}
// match: (MOVQstore [off] {sym} ptr a:(ANDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
// result: (ANDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
a := v.Args[1]
if a.Op != OpAMD64ANDQconst {
break
}
c := a.AuxInt
l := a.Args[0]
if l.Op != OpAMD64MOVQload {
break
}
if l.AuxInt != off {
break
}
if l.Aux != sym {
break
}
_ = l.Args[1]
ptr2 := l.Args[0]
mem := l.Args[1]
if mem != v.Args[2] {
break
}
if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
break
}
v.reset(OpAMD64ANDQconstmodify)
v.AuxInt = makeValAndOff(c, off)
v.Aux = sym
v.AddArg(ptr)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64MOVQstore_10(v *Value) bool {
// match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
// result: (ORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
a := v.Args[1]
if a.Op != OpAMD64ORQconst {
break
}
c := a.AuxInt
l := a.Args[0]
if l.Op != OpAMD64MOVQload {
break
}
if l.AuxInt != off {
break
}
if l.Aux != sym {
break
}
_ = l.Args[1]
ptr2 := l.Args[0]
mem := l.Args[1]
if mem != v.Args[2] {
break
}
if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
break
}
v.reset(OpAMD64ORQconstmodify)
v.AuxInt = makeValAndOff(c, off)
v.Aux = sym
v.AddArg(ptr)
v.AddArg(mem)
return true
}
// match: (MOVQstore [off] {sym} ptr a:(XORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
// result: (XORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
a := v.Args[1]
if a.Op != OpAMD64XORQconst {
break
}
c := a.AuxInt
l := a.Args[0]
if l.Op != OpAMD64MOVQload {
break
}
if l.AuxInt != off {
break
}
if l.Aux != sym {
break
}
_ = l.Args[1]
ptr2 := l.Args[0]
mem := l.Args[1]
if mem != v.Args[2] {
break
}
if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
break
}
v.reset(OpAMD64XORQconstmodify)
v.AuxInt = makeValAndOff(c, off)
v.Aux = sym
v.AddArg(ptr)
v.AddArg(mem)
return true
}
// match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem)
// cond:
// result: (MOVSDstore [off] {sym} ptr val mem)
......@@ -30779,6 +31132,58 @@ func rewriteValueAMD64_OpAMD64ORLconst_0(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64ORLconstmodify_0(v *Value) bool {
// match: (ORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
// cond: ValAndOff(valoff1).canAdd(off2)
// result: (ORLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
for {
valoff1 := v.AuxInt
sym := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64ADDQconst {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2)) {
break
}
v.reset(OpAMD64ORLconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = sym
v.AddArg(base)
v.AddArg(mem)
return true
}
// match: (ORLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
// result: (ORLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
for {
valoff1 := v.AuxInt
sym1 := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64LEAQ {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpAMD64ORLconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = mergeSym(sym1, sym2)
v.AddArg(base)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64ORLload_0(v *Value) bool {
b := v.Block
_ = b
......@@ -41688,6 +42093,58 @@ func rewriteValueAMD64_OpAMD64ORQconst_0(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64ORQconstmodify_0(v *Value) bool {
// match: (ORQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
// cond: ValAndOff(valoff1).canAdd(off2)
// result: (ORQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
for {
valoff1 := v.AuxInt
sym := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64ADDQconst {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2)) {
break
}
v.reset(OpAMD64ORQconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = sym
v.AddArg(base)
v.AddArg(mem)
return true
}
// match: (ORQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
// result: (ORQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
for {
valoff1 := v.AuxInt
sym1 := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64LEAQ {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpAMD64ORQconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = mergeSym(sym1, sym2)
v.AddArg(base)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64ORQload_0(v *Value) bool {
b := v.Block
_ = b
......@@ -52184,6 +52641,58 @@ func rewriteValueAMD64_OpAMD64XORLconst_10(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64XORLconstmodify_0(v *Value) bool {
// match: (XORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
// cond: ValAndOff(valoff1).canAdd(off2)
// result: (XORLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
for {
valoff1 := v.AuxInt
sym := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64ADDQconst {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2)) {
break
}
v.reset(OpAMD64XORLconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = sym
v.AddArg(base)
v.AddArg(mem)
return true
}
// match: (XORLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
// result: (XORLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
for {
valoff1 := v.AuxInt
sym1 := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64LEAQ {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpAMD64XORLconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = mergeSym(sym1, sym2)
v.AddArg(base)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64XORLload_0(v *Value) bool {
b := v.Block
_ = b
......@@ -52598,6 +53107,58 @@ func rewriteValueAMD64_OpAMD64XORQconst_0(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64XORQconstmodify_0(v *Value) bool {
// match: (XORQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
// cond: ValAndOff(valoff1).canAdd(off2)
// result: (XORQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
for {
valoff1 := v.AuxInt
sym := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64ADDQconst {
break
}
off2 := v_0.AuxInt
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2)) {
break
}
v.reset(OpAMD64XORQconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = sym
v.AddArg(base)
v.AddArg(mem)
return true
}
// match: (XORQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
// result: (XORQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
for {
valoff1 := v.AuxInt
sym1 := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpAMD64LEAQ {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
base := v_0.Args[0]
mem := v.Args[1]
if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
break
}
v.reset(OpAMD64XORQconstmodify)
v.AuxInt = ValAndOff(valoff1).add(off2)
v.Aux = mergeSym(sym1, sym2)
v.AddArg(base)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64XORQload_0(v *Value) bool {
b := v.Block
_ = b
......@@ -262,6 +262,16 @@ func bitcompl32(a, b uint32) (n uint32) {
return n
}
// check direct operation on memory with constant source
func bitOpOnMem(a []uint32) {
// amd64:`ANDL\s[$]200,\s\([A-Z]+\)`
a[0] &= 200
// amd64:`ORL\s[$]220,\s4\([A-Z]+\)`
a[1] |= 220
// amd64:`XORL\s[$]240,\s8\([A-Z]+\)`
a[2] ^= 240
}
// Check AND masking on arm64 (Issue #19857)
func and_mask_1(a uint64) uint64 {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment