Commit 0f2ef0ad authored by Ilya Tocar's avatar Ilya Tocar

cmd/compile/internal/ssa: combine byte stores on amd64

On amd64 we optimize  encoding/binary.BigEndian.PutUint{16,32,64}
into bswap + single store, but strangely enough not LittleEndian.PutUint{16,32}.
We have similar rules, but they use 64-bit shifts everywhere,
and fail for 16/32-bit case. Add rules that matchLittleEndian.PutUint,
and relevant tests. Performance results:

LittleEndianPutUint16-6    1.43ns ± 0%    1.07ns ± 0%   -25.17%  (p=0.000 n=9+9)
LittleEndianPutUint32-6    2.14ns ± 0%    0.94ns ± 0%   -56.07%  (p=0.019 n=6+8)

LittleEndianPutUint16-6  1.40GB/s ± 0%  1.87GB/s ± 0%   +33.24%  (p=0.000 n=9+9)
LittleEndianPutUint32-6  1.87GB/s ± 0%  4.26GB/s ± 0%  +128.54%  (p=0.000 n=8+8)

Discovered, while looking at ethereum_ethash from community benchmarks

Change-Id: Id86d5443687ecddd2803edf3203dbdd1246f61fe
Reviewed-on: https://go-review.googlesource.com/95475
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent d7cd61ce
...@@ -338,6 +338,54 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -338,6 +338,54 @@ var linuxAMD64Tests = []*asmTest{
`, `,
pos: []string{"\tMOVL\t\\(.*\\)\\(.*\\*1\\),"}, pos: []string{"\tMOVL\t\\(.*\\)\\(.*\\*1\\),"},
}, },
{
fn: `
func $(b []byte, v uint64) {
binary.LittleEndian.PutUint64(b, v)
}
`,
neg: []string{"SHRQ"},
},
{
fn: `
func $(b []byte, i int, v uint64) {
binary.LittleEndian.PutUint64(b[i:], v)
}
`,
neg: []string{"SHRQ"},
},
{
fn: `
func $(b []byte, v uint32) {
binary.LittleEndian.PutUint32(b, v)
}
`,
neg: []string{"SHRL", "SHRQ"},
},
{
fn: `
func $(b []byte, i int, v uint32) {
binary.LittleEndian.PutUint32(b[i:], v)
}
`,
neg: []string{"SHRL", "SHRQ"},
},
{
fn: `
func $(b []byte, v uint16) {
binary.LittleEndian.PutUint16(b, v)
}
`,
neg: []string{"SHRW", "SHRL", "SHRQ"},
},
{
fn: `
func $(b []byte, i int, v uint16) {
binary.LittleEndian.PutUint16(b[i:], v)
}
`,
neg: []string{"SHRW", "SHRL", "SHRQ"},
},
{ {
fn: ` fn: `
func f6(b []byte) uint64 { func f6(b []byte) uint64 {
......
...@@ -2014,19 +2014,19 @@ ...@@ -2014,19 +2014,19 @@
-> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst <i.Type> [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) -> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst <i.Type> [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
// Combine stores into larger (unaligned) stores. // Combine stores into larger (unaligned) stores.
(MOVBstore [i] {s} p (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) (MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
&& x.Uses == 1 && x.Uses == 1
&& clobber(x) && clobber(x)
-> (MOVWstore [i-1] {s} p w mem) -> (MOVWstore [i-1] {s} p w mem)
(MOVBstore [i] {s} p (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRQconst [j-8] w) mem)) (MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem))
&& x.Uses == 1 && x.Uses == 1
&& clobber(x) && clobber(x)
-> (MOVWstore [i-1] {s} p w0 mem) -> (MOVWstore [i-1] {s} p w0 mem)
(MOVWstore [i] {s} p (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) (MOVWstore [i] {s} p (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p w mem))
&& x.Uses == 1 && x.Uses == 1
&& clobber(x) && clobber(x)
-> (MOVLstore [i-2] {s} p w mem) -> (MOVLstore [i-2] {s} p w mem)
(MOVWstore [i] {s} p (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRQconst [j-16] w) mem)) (MOVWstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p w0:(SHR(L|Q)const [j-16] w) mem))
&& x.Uses == 1 && x.Uses == 1
&& clobber(x) && clobber(x)
-> (MOVLstore [i-2] {s} p w0 mem) -> (MOVLstore [i-2] {s} p w0 mem)
...@@ -2039,19 +2039,19 @@ ...@@ -2039,19 +2039,19 @@
&& clobber(x) && clobber(x)
-> (MOVQstore [i-4] {s} p w0 mem) -> (MOVQstore [i-4] {s} p w0 mem)
(MOVBstoreidx1 [i] {s} p idx (SHRQconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) (MOVBstoreidx1 [i] {s} p idx (SHR(W|L|Q)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
&& x.Uses == 1 && x.Uses == 1
&& clobber(x) && clobber(x)
-> (MOVWstoreidx1 [i-1] {s} p idx w mem) -> (MOVWstoreidx1 [i-1] {s} p idx w mem)
(MOVBstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRQconst [j-8] w) mem)) (MOVBstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHR(L|Q)const [j-8] w) mem))
&& x.Uses == 1 && x.Uses == 1
&& clobber(x) && clobber(x)
-> (MOVWstoreidx1 [i-1] {s} p idx w0 mem) -> (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
(MOVWstoreidx1 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem)) (MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
&& x.Uses == 1 && x.Uses == 1
&& clobber(x) && clobber(x)
-> (MOVLstoreidx1 [i-2] {s} p idx w mem) -> (MOVLstoreidx1 [i-2] {s} p idx w mem)
(MOVWstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem)) (MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHR(L|Q)const [j-16] w) mem))
&& x.Uses == 1 && x.Uses == 1
&& clobber(x) && clobber(x)
-> (MOVLstoreidx1 [i-2] {s} p idx w0 mem) -> (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
...@@ -2064,7 +2064,7 @@ ...@@ -2064,7 +2064,7 @@
&& clobber(x) && clobber(x)
-> (MOVQstoreidx1 [i-4] {s} p idx w0 mem) -> (MOVQstoreidx1 [i-4] {s} p idx w0 mem)
(MOVWstoreidx2 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem)) (MOVWstoreidx2 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
&& x.Uses == 1 && x.Uses == 1
&& clobber(x) && clobber(x)
-> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem) -> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
......
...@@ -6462,6 +6462,96 @@ func rewriteValueAMD64_OpAMD64MOVBstore_20(v *Value) bool { ...@@ -6462,6 +6462,96 @@ func rewriteValueAMD64_OpAMD64MOVBstore_20(v *Value) bool {
_ = b _ = b
typ := &b.Func.Config.Types typ := &b.Func.Config.Types
_ = typ _ = typ
// match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
// cond: x.Uses == 1 && clobber(x)
// result: (MOVWstore [i-1] {s} p w mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
p := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64SHRWconst {
break
}
if v_1.AuxInt != 8 {
break
}
w := v_1.Args[0]
x := v.Args[2]
if x.Op != OpAMD64MOVBstore {
break
}
if x.AuxInt != i-1 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
if p != x.Args[0] {
break
}
if w != x.Args[1] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && clobber(x)) {
break
}
v.reset(OpAMD64MOVWstore)
v.AuxInt = i - 1
v.Aux = s
v.AddArg(p)
v.AddArg(w)
v.AddArg(mem)
return true
}
// match: (MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
// cond: x.Uses == 1 && clobber(x)
// result: (MOVWstore [i-1] {s} p w mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
p := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64SHRLconst {
break
}
if v_1.AuxInt != 8 {
break
}
w := v_1.Args[0]
x := v.Args[2]
if x.Op != OpAMD64MOVBstore {
break
}
if x.AuxInt != i-1 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
if p != x.Args[0] {
break
}
if w != x.Args[1] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && clobber(x)) {
break
}
v.reset(OpAMD64MOVWstore)
v.AuxInt = i - 1
v.Aux = s
v.AddArg(p)
v.AddArg(w)
v.AddArg(mem)
return true
}
// match: (MOVBstore [i] {s} p (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) // match: (MOVBstore [i] {s} p (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
// cond: x.Uses == 1 && clobber(x) // cond: x.Uses == 1 && clobber(x)
// result: (MOVWstore [i-1] {s} p w mem) // result: (MOVWstore [i-1] {s} p w mem)
...@@ -6507,6 +6597,56 @@ func rewriteValueAMD64_OpAMD64MOVBstore_20(v *Value) bool { ...@@ -6507,6 +6597,56 @@ func rewriteValueAMD64_OpAMD64MOVBstore_20(v *Value) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
// cond: x.Uses == 1 && clobber(x)
// result: (MOVWstore [i-1] {s} p w0 mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
p := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64SHRLconst {
break
}
j := v_1.AuxInt
w := v_1.Args[0]
x := v.Args[2]
if x.Op != OpAMD64MOVBstore {
break
}
if x.AuxInt != i-1 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
if p != x.Args[0] {
break
}
w0 := x.Args[1]
if w0.Op != OpAMD64SHRLconst {
break
}
if w0.AuxInt != j-8 {
break
}
if w != w0.Args[0] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && clobber(x)) {
break
}
v.reset(OpAMD64MOVWstore)
v.AuxInt = i - 1
v.Aux = s
v.AddArg(p)
v.AddArg(w0)
v.AddArg(mem)
return true
}
// match: (MOVBstore [i] {s} p (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRQconst [j-8] w) mem)) // match: (MOVBstore [i] {s} p (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRQconst [j-8] w) mem))
// cond: x.Uses == 1 && clobber(x) // cond: x.Uses == 1 && clobber(x)
// result: (MOVWstore [i-1] {s} p w0 mem) // result: (MOVWstore [i-1] {s} p w0 mem)
...@@ -7385,7 +7525,7 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool { ...@@ -7385,7 +7525,7 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) // match: (MOVBstoreidx1 [i] {s} p idx (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
// cond: x.Uses == 1 && clobber(x) // cond: x.Uses == 1 && clobber(x)
// result: (MOVWstoreidx1 [i-1] {s} p idx w mem) // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
for { for {
...@@ -7395,7 +7535,7 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool { ...@@ -7395,7 +7535,7 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
p := v.Args[0] p := v.Args[0]
idx := v.Args[1] idx := v.Args[1]
v_2 := v.Args[2] v_2 := v.Args[2]
if v_2.Op != OpAMD64SHRQconst { if v_2.Op != OpAMD64SHRWconst {
break break
} }
if v_2.AuxInt != 8 { if v_2.AuxInt != 8 {
...@@ -7435,9 +7575,9 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool { ...@@ -7435,9 +7575,9 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRQconst [j-8] w) mem)) // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
// cond: x.Uses == 1 && clobber(x) // cond: x.Uses == 1 && clobber(x)
// result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem) // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
for { for {
i := v.AuxInt i := v.AuxInt
s := v.Aux s := v.Aux
...@@ -7445,10 +7585,12 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool { ...@@ -7445,10 +7585,12 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
p := v.Args[0] p := v.Args[0]
idx := v.Args[1] idx := v.Args[1]
v_2 := v.Args[2] v_2 := v.Args[2]
if v_2.Op != OpAMD64SHRQconst { if v_2.Op != OpAMD64SHRLconst {
break
}
if v_2.AuxInt != 8 {
break break
} }
j := v_2.AuxInt
w := v_2.Args[0] w := v_2.Args[0]
x := v.Args[3] x := v.Args[3]
if x.Op != OpAMD64MOVBstoreidx1 { if x.Op != OpAMD64MOVBstoreidx1 {
...@@ -7467,14 +7609,7 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool { ...@@ -7467,14 +7609,7 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
if idx != x.Args[1] { if idx != x.Args[1] {
break break
} }
w0 := x.Args[2] if w != x.Args[2] {
if w0.Op != OpAMD64SHRQconst {
break
}
if w0.AuxInt != j-8 {
break
}
if w != w0.Args[0] {
break break
} }
mem := x.Args[3] mem := x.Args[3]
...@@ -7486,108 +7621,268 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool { ...@@ -7486,108 +7621,268 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
v.Aux = s v.Aux = s
v.AddArg(p) v.AddArg(p)
v.AddArg(idx) v.AddArg(idx)
v.AddArg(w0) v.AddArg(w)
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
return false // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
}
func rewriteValueAMD64_OpAMD64MOVLQSX_0(v *Value) bool {
b := v.Block
_ = b
// match: (MOVLQSX x:(MOVLload [off] {sym} ptr mem))
// cond: x.Uses == 1 && clobber(x) // cond: x.Uses == 1 && clobber(x)
// result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem) // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
for { for {
x := v.Args[0] i := v.AuxInt
if x.Op != OpAMD64MOVLload { s := v.Aux
_ = v.Args[3]
p := v.Args[0]
idx := v.Args[1]
v_2 := v.Args[2]
if v_2.Op != OpAMD64SHRQconst {
break break
} }
off := x.AuxInt if v_2.AuxInt != 8 {
sym := x.Aux
_ = x.Args[1]
ptr := x.Args[0]
mem := x.Args[1]
if !(x.Uses == 1 && clobber(x)) {
break break
} }
b = x.Block w := v_2.Args[0]
v0 := b.NewValue0(v.Pos, OpAMD64MOVLQSXload, v.Type) x := v.Args[3]
v.reset(OpCopy) if x.Op != OpAMD64MOVBstoreidx1 {
v.AddArg(v0)
v0.AuxInt = off
v0.Aux = sym
v0.AddArg(ptr)
v0.AddArg(mem)
return true
}
// match: (MOVLQSX x:(MOVQload [off] {sym} ptr mem))
// cond: x.Uses == 1 && clobber(x)
// result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
for {
x := v.Args[0]
if x.Op != OpAMD64MOVQload {
break break
} }
off := x.AuxInt if x.AuxInt != i-1 {
sym := x.Aux
_ = x.Args[1]
ptr := x.Args[0]
mem := x.Args[1]
if !(x.Uses == 1 && clobber(x)) {
break break
} }
b = x.Block if x.Aux != s {
v0 := b.NewValue0(v.Pos, OpAMD64MOVLQSXload, v.Type)
v.reset(OpCopy)
v.AddArg(v0)
v0.AuxInt = off
v0.Aux = sym
v0.AddArg(ptr)
v0.AddArg(mem)
return true
}
// match: (MOVLQSX (ANDLconst [c] x))
// cond: c & 0x80000000 == 0
// result: (ANDLconst [c & 0x7fffffff] x)
for {
v_0 := v.Args[0]
if v_0.Op != OpAMD64ANDLconst {
break break
} }
c := v_0.AuxInt _ = x.Args[3]
x := v_0.Args[0] if p != x.Args[0] {
if !(c&0x80000000 == 0) {
break break
} }
v.reset(OpAMD64ANDLconst) if idx != x.Args[1] {
v.AuxInt = c & 0x7fffffff
v.AddArg(x)
return true
}
// match: (MOVLQSX (MOVLQSX x))
// cond:
// result: (MOVLQSX x)
for {
v_0 := v.Args[0]
if v_0.Op != OpAMD64MOVLQSX {
break break
} }
x := v_0.Args[0] if w != x.Args[2] {
v.reset(OpAMD64MOVLQSX)
v.AddArg(x)
return true
}
// match: (MOVLQSX (MOVWQSX x))
// cond:
// result: (MOVWQSX x)
for {
v_0 := v.Args[0]
if v_0.Op != OpAMD64MOVWQSX {
break break
} }
x := v_0.Args[0] mem := x.Args[3]
v.reset(OpAMD64MOVWQSX) if !(x.Uses == 1 && clobber(x)) {
break
}
v.reset(OpAMD64MOVWstoreidx1)
v.AuxInt = i - 1
v.Aux = s
v.AddArg(p)
v.AddArg(idx)
v.AddArg(w)
v.AddArg(mem)
return true
}
// match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
// cond: x.Uses == 1 && clobber(x)
// result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[3]
p := v.Args[0]
idx := v.Args[1]
v_2 := v.Args[2]
if v_2.Op != OpAMD64SHRLconst {
break
}
j := v_2.AuxInt
w := v_2.Args[0]
x := v.Args[3]
if x.Op != OpAMD64MOVBstoreidx1 {
break
}
if x.AuxInt != i-1 {
break
}
if x.Aux != s {
break
}
_ = x.Args[3]
if p != x.Args[0] {
break
}
if idx != x.Args[1] {
break
}
w0 := x.Args[2]
if w0.Op != OpAMD64SHRLconst {
break
}
if w0.AuxInt != j-8 {
break
}
if w != w0.Args[0] {
break
}
mem := x.Args[3]
if !(x.Uses == 1 && clobber(x)) {
break
}
v.reset(OpAMD64MOVWstoreidx1)
v.AuxInt = i - 1
v.Aux = s
v.AddArg(p)
v.AddArg(idx)
v.AddArg(w0)
v.AddArg(mem)
return true
}
// match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRQconst [j-8] w) mem))
// cond: x.Uses == 1 && clobber(x)
// result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[3]
p := v.Args[0]
idx := v.Args[1]
v_2 := v.Args[2]
if v_2.Op != OpAMD64SHRQconst {
break
}
j := v_2.AuxInt
w := v_2.Args[0]
x := v.Args[3]
if x.Op != OpAMD64MOVBstoreidx1 {
break
}
if x.AuxInt != i-1 {
break
}
if x.Aux != s {
break
}
_ = x.Args[3]
if p != x.Args[0] {
break
}
if idx != x.Args[1] {
break
}
w0 := x.Args[2]
if w0.Op != OpAMD64SHRQconst {
break
}
if w0.AuxInt != j-8 {
break
}
if w != w0.Args[0] {
break
}
mem := x.Args[3]
if !(x.Uses == 1 && clobber(x)) {
break
}
v.reset(OpAMD64MOVWstoreidx1)
v.AuxInt = i - 1
v.Aux = s
v.AddArg(p)
v.AddArg(idx)
v.AddArg(w0)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64MOVLQSX_0(v *Value) bool {
b := v.Block
_ = b
// match: (MOVLQSX x:(MOVLload [off] {sym} ptr mem))
// cond: x.Uses == 1 && clobber(x)
// result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
for {
x := v.Args[0]
if x.Op != OpAMD64MOVLload {
break
}
off := x.AuxInt
sym := x.Aux
_ = x.Args[1]
ptr := x.Args[0]
mem := x.Args[1]
if !(x.Uses == 1 && clobber(x)) {
break
}
b = x.Block
v0 := b.NewValue0(v.Pos, OpAMD64MOVLQSXload, v.Type)
v.reset(OpCopy)
v.AddArg(v0)
v0.AuxInt = off
v0.Aux = sym
v0.AddArg(ptr)
v0.AddArg(mem)
return true
}
// match: (MOVLQSX x:(MOVQload [off] {sym} ptr mem))
// cond: x.Uses == 1 && clobber(x)
// result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
for {
x := v.Args[0]
if x.Op != OpAMD64MOVQload {
break
}
off := x.AuxInt
sym := x.Aux
_ = x.Args[1]
ptr := x.Args[0]
mem := x.Args[1]
if !(x.Uses == 1 && clobber(x)) {
break
}
b = x.Block
v0 := b.NewValue0(v.Pos, OpAMD64MOVLQSXload, v.Type)
v.reset(OpCopy)
v.AddArg(v0)
v0.AuxInt = off
v0.Aux = sym
v0.AddArg(ptr)
v0.AddArg(mem)
return true
}
// match: (MOVLQSX (ANDLconst [c] x))
// cond: c & 0x80000000 == 0
// result: (ANDLconst [c & 0x7fffffff] x)
for {
v_0 := v.Args[0]
if v_0.Op != OpAMD64ANDLconst {
break
}
c := v_0.AuxInt
x := v_0.Args[0]
if !(c&0x80000000 == 0) {
break
}
v.reset(OpAMD64ANDLconst)
v.AuxInt = c & 0x7fffffff
v.AddArg(x)
return true
}
// match: (MOVLQSX (MOVLQSX x))
// cond:
// result: (MOVLQSX x)
for {
v_0 := v.Args[0]
if v_0.Op != OpAMD64MOVLQSX {
break
}
x := v_0.Args[0]
v.reset(OpAMD64MOVLQSX)
v.AddArg(x)
return true
}
// match: (MOVLQSX (MOVWQSX x))
// cond:
// result: (MOVWQSX x)
for {
v_0 := v.Args[0]
if v_0.Op != OpAMD64MOVWQSX {
break
}
x := v_0.Args[0]
v.reset(OpAMD64MOVWQSX)
v.AddArg(x) v.AddArg(x)
return true return true
} }
...@@ -13570,6 +13865,51 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool { ...@@ -13570,6 +13865,51 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
// cond: x.Uses == 1 && clobber(x)
// result: (MOVLstore [i-2] {s} p w mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
p := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64SHRLconst {
break
}
if v_1.AuxInt != 16 {
break
}
w := v_1.Args[0]
x := v.Args[2]
if x.Op != OpAMD64MOVWstore {
break
}
if x.AuxInt != i-2 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
if p != x.Args[0] {
break
}
if w != x.Args[1] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && clobber(x)) {
break
}
v.reset(OpAMD64MOVLstore)
v.AuxInt = i - 2
v.Aux = s
v.AddArg(p)
v.AddArg(w)
v.AddArg(mem)
return true
}
// match: (MOVWstore [i] {s} p (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) // match: (MOVWstore [i] {s} p (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
// cond: x.Uses == 1 && clobber(x) // cond: x.Uses == 1 && clobber(x)
// result: (MOVLstore [i-2] {s} p w mem) // result: (MOVLstore [i-2] {s} p w mem)
...@@ -13615,6 +13955,63 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool { ...@@ -13615,6 +13955,63 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
return false
}
func rewriteValueAMD64_OpAMD64MOVWstore_10(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
// cond: x.Uses == 1 && clobber(x)
// result: (MOVLstore [i-2] {s} p w0 mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
p := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64SHRLconst {
break
}
j := v_1.AuxInt
w := v_1.Args[0]
x := v.Args[2]
if x.Op != OpAMD64MOVWstore {
break
}
if x.AuxInt != i-2 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
if p != x.Args[0] {
break
}
w0 := x.Args[1]
if w0.Op != OpAMD64SHRLconst {
break
}
if w0.AuxInt != j-16 {
break
}
if w != w0.Args[0] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && clobber(x)) {
break
}
v.reset(OpAMD64MOVLstore)
v.AuxInt = i - 2
v.Aux = s
v.AddArg(p)
v.AddArg(w0)
v.AddArg(mem)
return true
}
// match: (MOVWstore [i] {s} p (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRQconst [j-16] w) mem)) // match: (MOVWstore [i] {s} p (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRQconst [j-16] w) mem))
// cond: x.Uses == 1 && clobber(x) // cond: x.Uses == 1 && clobber(x)
// result: (MOVLstore [i-2] {s} p w0 mem) // result: (MOVLstore [i-2] {s} p w0 mem)
...@@ -13665,13 +14062,6 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool { ...@@ -13665,13 +14062,6 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
return false
}
func rewriteValueAMD64_OpAMD64MOVWstore_10(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (MOVWstore [i] {s} p x1:(MOVWload [j] {s2} p2 mem) mem2:(MOVWstore [i-2] {s} p x2:(MOVWload [j-2] {s2} p2 mem) mem)) // match: (MOVWstore [i] {s} p x1:(MOVWload [j] {s2} p2 mem) mem2:(MOVWstore [i-2] {s} p x2:(MOVWload [j-2] {s2} p2 mem) mem))
// cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2) // cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)
// result: (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem) // result: (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem)
...@@ -14301,6 +14691,56 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool { ...@@ -14301,6 +14691,56 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
// cond: x.Uses == 1 && clobber(x)
// result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[3]
p := v.Args[0]
idx := v.Args[1]
v_2 := v.Args[2]
if v_2.Op != OpAMD64SHRLconst {
break
}
if v_2.AuxInt != 16 {
break
}
w := v_2.Args[0]
x := v.Args[3]
if x.Op != OpAMD64MOVWstoreidx1 {
break
}
if x.AuxInt != i-2 {
break
}
if x.Aux != s {
break
}
_ = x.Args[3]
if p != x.Args[0] {
break
}
if idx != x.Args[1] {
break
}
if w != x.Args[2] {
break
}
mem := x.Args[3]
if !(x.Uses == 1 && clobber(x)) {
break
}
v.reset(OpAMD64MOVLstoreidx1)
v.AuxInt = i - 2
v.Aux = s
v.AddArg(p)
v.AddArg(idx)
v.AddArg(w)
v.AddArg(mem)
return true
}
// match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem)) // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
// cond: x.Uses == 1 && clobber(x) // cond: x.Uses == 1 && clobber(x)
// result: (MOVLstoreidx1 [i-2] {s} p idx w mem) // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
...@@ -14351,6 +14791,61 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool { ...@@ -14351,6 +14791,61 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
// cond: x.Uses == 1 && clobber(x)
// result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[3]
p := v.Args[0]
idx := v.Args[1]
v_2 := v.Args[2]
if v_2.Op != OpAMD64SHRLconst {
break
}
j := v_2.AuxInt
w := v_2.Args[0]
x := v.Args[3]
if x.Op != OpAMD64MOVWstoreidx1 {
break
}
if x.AuxInt != i-2 {
break
}
if x.Aux != s {
break
}
_ = x.Args[3]
if p != x.Args[0] {
break
}
if idx != x.Args[1] {
break
}
w0 := x.Args[2]
if w0.Op != OpAMD64SHRLconst {
break
}
if w0.AuxInt != j-16 {
break
}
if w != w0.Args[0] {
break
}
mem := x.Args[3]
if !(x.Uses == 1 && clobber(x)) {
break
}
v.reset(OpAMD64MOVLstoreidx1)
v.AuxInt = i - 2
v.Aux = s
v.AddArg(p)
v.AddArg(idx)
v.AddArg(w0)
v.AddArg(mem)
return true
}
// match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem)) // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
// cond: x.Uses == 1 && clobber(x) // cond: x.Uses == 1 && clobber(x)
// result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem) // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
...@@ -14467,6 +14962,59 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx2_0(v *Value) bool { ...@@ -14467,6 +14962,59 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx2_0(v *Value) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
// cond: x.Uses == 1 && clobber(x)
// result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[3]
p := v.Args[0]
idx := v.Args[1]
v_2 := v.Args[2]
if v_2.Op != OpAMD64SHRLconst {
break
}
if v_2.AuxInt != 16 {
break
}
w := v_2.Args[0]
x := v.Args[3]
if x.Op != OpAMD64MOVWstoreidx2 {
break
}
if x.AuxInt != i-2 {
break
}
if x.Aux != s {
break
}
_ = x.Args[3]
if p != x.Args[0] {
break
}
if idx != x.Args[1] {
break
}
if w != x.Args[2] {
break
}
mem := x.Args[3]
if !(x.Uses == 1 && clobber(x)) {
break
}
v.reset(OpAMD64MOVLstoreidx1)
v.AuxInt = i - 2
v.Aux = s
v.AddArg(p)
v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
v0.AuxInt = 1
v0.AddArg(idx)
v.AddArg(v0)
v.AddArg(w)
v.AddArg(mem)
return true
}
// match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem)) // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
// cond: x.Uses == 1 && clobber(x) // cond: x.Uses == 1 && clobber(x)
// result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem) // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment