Commit 39fefa07 authored by Chad Rosier's avatar Chad Rosier Committed by Cherry Zhang

cmd/compile/internal/ssa: combine consecutive BigEndian stores on arm64

This optimization mirrors that which is already implemented for AMD64.  The
optimization specifically targets the binary.BigEndian.PutUint* functions.

encoding-binary results on Amberwing:
name                   old time/op    new time/op    delta
ReadSlice1000Int32s      9.83µs ± 2%    9.78µs ± 1%     ~     (p=0.362 n=9+10)
ReadStruct               5.24µs ± 3%    5.19µs ± 2%     ~     (p=0.285 n=10+10)
ReadInts                 8.35µs ± 8%    8.44µs ± 3%     ~     (p=0.323 n=10+10)
WriteInts                3.38µs ± 3%    3.44µs ±15%     ~     (p=0.921 n=9+10)
WriteSlice1000Int32s     11.4µs ± 6%    10.2µs ± 4%   -9.94%  (p=0.000 n=10+10)
PutUint16                 510ns ±12%     500ns ± 0%     ~     (p=0.586 n=10+7)
PutUint32                 530ns ±15%     490ns ±12%     ~     (p=0.086 n=10+10)
PutUint64                 550ns ± 0%     470ns ± 6%  -14.52%  (p=0.000 n=7+10)
LittleEndianPutUint16     500ns ± 0%     475ns ±16%     ~     (p=0.120 n=7+10)
LittleEndianPutUint32     450ns ± 0%     517ns ±16%  +14.81%  (p=0.004 n=8+9)
LittleEndianPutUint64     550ns ± 0%     485ns ±13%  -11.82%  (p=0.000 n=8+10)
PutUvarint32              685ns ±12%     622ns ± 4%   -9.17%  (p=0.005 n=10+9)
PutUvarint64              735ns ± 9%     711ns ± 9%     ~     (p=0.272 n=10+9)
[Geo mean]               1.47µs         1.42µs        -3.87%

name                   old speed      new speed      delta
ReadSlice1000Int32s     407MB/s ± 2%   409MB/s ± 1%     ~     (p=0.362 n=9+10)
ReadStruct             14.3MB/s ± 3%  14.4MB/s ± 2%     ~     (p=0.250 n=10+10)
ReadInts               3.59MB/s ± 7%  3.56MB/s ± 4%     ~     (p=0.340 n=10+10)
WriteInts              8.87MB/s ± 3%  8.74MB/s ±13%     ~     (p=0.890 n=9+10)
WriteSlice1000Int32s    352MB/s ± 6%   391MB/s ± 4%  +11.03%  (p=0.000 n=10+10)
PutUint16              3.95MB/s ±13%  4.00MB/s ± 0%     ~     (p=0.312 n=10+7)
PutUint32              7.62MB/s ±17%  8.21MB/s ±11%     ~     (p=0.086 n=10+10)
PutUint64              14.6MB/s ± 0%  17.1MB/s ± 6%  +17.28%  (p=0.000 n=7+10)
LittleEndianPutUint16  4.00MB/s ± 0%  4.23MB/s ±18%     ~     (p=0.176 n=7+10)
LittleEndianPutUint32  8.89MB/s ± 0%  7.64MB/s ±20%  -14.05%  (p=0.001 n=8+10)
LittleEndianPutUint64  14.6MB/s ± 0%  16.6MB/s ±12%  +13.86%  (p=0.000 n=8+10)
PutUvarint32           5.86MB/s ±14%  6.44MB/s ± 5%   +9.84%  (p=0.006 n=10+9)
PutUvarint64           10.9MB/s ± 8%  11.3MB/s ± 9%     ~     (p=0.373 n=10+9)
[Geo mean]             14.2MB/s       14.8MB/s        +3.93%

go1 results on Amberwing:
RegexpMatchEasy0_32       254ns ± 0%     254ns ± 0%    ~     (all equal)
RegexpMatchEasy0_1K       547ns ± 0%     547ns ± 0%    ~     (all equal)
RegexpMatchEasy1_32       252ns ± 0%     253ns ± 1%    ~     (p=0.294 n=8+10)
RegexpMatchEasy1_1K       782ns ± 0%     783ns ± 1%    ~     (p=0.529 n=8+9)
RegexpMatchMedium_32      316ns ± 0%     316ns ± 0%    ~     (all equal)
RegexpMatchMedium_1K     51.5µs ± 0%    51.5µs ± 0%    ~     (p=0.645 n=10+9)
RegexpMatchHard_32       2.75µs ± 0%    2.75µs ± 0%    ~     (all equal)
RegexpMatchHard_1K       78.7µs ± 0%    78.7µs ± 0%    ~     (p=0.754 n=10+10)
FmtFprintfEmpty          57.0ns ± 0%    57.0ns ± 0%    ~     (all equal)
FmtFprintfString          111ns ± 0%     111ns ± 0%    ~     (all equal)
FmtFprintfInt             114ns ± 0%     114ns ± 1%    ~     (p=0.065 n=9+10)
FmtFprintfIntInt          182ns ± 0%     178ns ± 0%  -2.20%  (p=0.000 n=10+10)
FmtFprintfPrefixedInt     225ns ± 0%     227ns ± 0%  +0.89%  (p=0.000 n=10+10)
FmtFprintfFloat           307ns ± 0%     307ns ± 0%    ~     (p=1.000 n=9+9)
FmtManyArgs               697ns ± 0%     701ns ± 2%    ~     (p=0.108 n=9+10)
Gzip                      436ms ± 0%     437ms ± 0%  +0.23%  (p=0.000 n=10+8)
HTTPClientServer         88.8µs ± 2%    89.6µs ± 1%  +0.98%  (p=0.019 n=10+10)
JSONEncode               20.1ms ± 1%    20.2ms ± 1%  +0.48%  (p=0.007 n=10+10)
JSONDecode               94.7ms ± 1%    94.1ms ± 0%  -0.62%  (p=0.000 n=10+9)
GobDecode                12.6ms ± 2%    12.6ms ± 1%    ~     (p=0.360 n=10+8)
GobEncode                12.0ms ± 1%    11.9ms ± 1%  -1.34%  (p=0.000 n=10+10)
Mandelbrot200            5.05ms ± 0%    5.05ms ± 0%  +0.12%  (p=0.000 n=10+10)
TimeParse                 448ns ± 0%     448ns ± 0%    ~     (p=0.529 n=8+9)
TimeFormat                501ns ± 1%     501ns ± 1%    ~     (p=1.000 n=10+9)
Template                 90.6ms ± 0%    89.1ms ± 0%  -1.67%  (p=0.000 n=9+9)
GoParse                  6.01ms ± 0%    5.96ms ± 0%  -0.83%  (p=0.000 n=10+9)
BinaryTree17              11.7s ± 0%     11.7s ± 0%    ~     (p=0.481 n=10+10)
Revcomp                   675ms ± 0%     675ms ± 0%    ~     (p=0.436 n=9+9)
Fannkuch11                3.26s ± 0%     3.27s ± 1%  +0.57%  (p=0.000 n=10+10)
[Geo mean]               67.4µs         67.3µs       -0.10%

name                   old speed      new speed      delta
RegexpMatchEasy0_32     126MB/s ± 0%   126MB/s ± 0%    ~     (p=0.353 n=10+7)
RegexpMatchEasy0_1K    1.87GB/s ± 0%  1.87GB/s ± 0%    ~     (p=0.275 n=8+10)
RegexpMatchEasy1_32     127MB/s ± 0%   126MB/s ± 1%    ~     (p=0.110 n=8+10)
RegexpMatchEasy1_1K    1.31GB/s ± 0%  1.31GB/s ± 1%    ~     (p=0.079 n=8+10)
RegexpMatchMedium_32   3.16MB/s ± 0%  3.16MB/s ± 0%    ~     (all equal)
RegexpMatchMedium_1K   19.9MB/s ± 0%  19.9MB/s ± 0%    ~     (p=0.889 n=10+9)
RegexpMatchHard_32     11.7MB/s ± 0%  11.7MB/s ± 0%    ~     (all equal)
RegexpMatchHard_1K     13.0MB/s ± 0%  13.0MB/s ± 0%    ~     (p=1.000 n=10+10)
Gzip                   44.5MB/s ± 0%  44.4MB/s ± 0%  -0.22%  (p=0.000 n=10+8)
JSONEncode             96.6MB/s ± 1%  96.1MB/s ± 1%  -0.48%  (p=0.007 n=10+10)
JSONDecode             20.5MB/s ± 1%  20.6MB/s ± 0%  +0.63%  (p=0.000 n=10+9)
GobDecode              61.0MB/s ± 2%  61.1MB/s ± 1%    ~     (p=0.372 n=10+8)
GobEncode              63.8MB/s ± 1%  64.7MB/s ± 1%  +1.36%  (p=0.000 n=10+10)
Template               21.4MB/s ± 0%  21.8MB/s ± 0%  +1.69%  (p=0.000 n=9+9)
GoParse                9.63MB/s ± 0%  9.71MB/s ± 0%  +0.84%  (p=0.000 n=9+8)
Revcomp                 377MB/s ± 0%   376MB/s ± 0%    ~     (p=0.399 n=9+9)
[Geo mean]             56.2MB/s       56.3MB/s       +0.20%

Change-Id: Ic915373f5ef512f9fbc45745860e5db7f6de6286
Reviewed-on: https://go-review.googlesource.com/97755
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent 93665c0d
......@@ -3318,6 +3318,33 @@ var linuxARM64Tests = []*asmTest{
pos: []string{"MOVD"},
neg: []string{"MOVB", "MOVH", "MOVW"},
},
{
fn: `
func $(b []byte, v uint16) {
binary.BigEndian.PutUint16(b, v)
}
`,
pos: []string{"MOVH"},
neg: []string{"MOVB"},
},
{
fn: `
func $(b []byte, v uint32) {
binary.BigEndian.PutUint32(b, v)
}
`,
pos: []string{"MOVW"},
neg: []string{"MOVB", "MOVH"},
},
{
fn: `
func $(b []byte, v uint64) {
binary.BigEndian.PutUint64(b, v)
}
`,
pos: []string{"MOVD"},
neg: []string{"MOVB", "MOVH", "MOVW"},
},
}
var linuxMIPSTests = []*asmTest{
......
......@@ -1545,6 +1545,63 @@
&& isSamePtr(ptr0, ptr1)
&& clobber(x)
-> (MOVDstore [i-4] {s} ptr0 w0 mem)
(MOVBstore [i] {s} ptr w
x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w)
x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w)
x4:(MOVBstore [i-5] {s} ptr (SRLconst [40] w)
x5:(MOVBstore [i-6] {s} ptr (SRLconst [48] w)
x6:(MOVBstore [i-7] {s} ptr (SRLconst [56] w) mem))))))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& x3.Uses == 1
&& x4.Uses == 1
&& x5.Uses == 1
&& x6.Uses == 1
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
&& clobber(x3)
&& clobber(x4)
&& clobber(x5)
&& clobber(x6)
-> (MOVDstore [i-7] {s} ptr (REV <w.Type> w) mem)
(MOVBstore [i] {s} ptr w
x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVWUreg w))
x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVWUreg w))
x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVWUreg w)) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
-> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
(MOVBstore [i] {s} ptr w
x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
-> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVHUreg w)) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVWUreg w)) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
// FP simplification
(FNEGS (FMULS x y)) -> (FNMULS x y)
......
......@@ -6125,6 +6125,8 @@ func rewriteValueARM64_OpARM64MOVBstore_0(v *Value) bool {
return false
}
func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
b := v.Block
_ = b
// match: (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVHUreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
// cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
// result: (MOVHstore [i-1] {s} ptr0 w mem)
......@@ -6323,6 +6325,548 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
v.AddArg(mem)
return true
}
// match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w) x4:(MOVBstore [i-5] {s} ptr (SRLconst [40] w) x5:(MOVBstore [i-6] {s} ptr (SRLconst [48] w) x6:(MOVBstore [i-7] {s} ptr (SRLconst [56] w) mem))))))))
// cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
// result: (MOVDstore [i-7] {s} ptr (REV <w.Type> w) mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
w := v.Args[1]
x0 := v.Args[2]
if x0.Op != OpARM64MOVBstore {
break
}
if x0.AuxInt != i-1 {
break
}
if x0.Aux != s {
break
}
_ = x0.Args[2]
if ptr != x0.Args[0] {
break
}
x0_1 := x0.Args[1]
if x0_1.Op != OpARM64SRLconst {
break
}
if x0_1.AuxInt != 8 {
break
}
if w != x0_1.Args[0] {
break
}
x1 := x0.Args[2]
if x1.Op != OpARM64MOVBstore {
break
}
if x1.AuxInt != i-2 {
break
}
if x1.Aux != s {
break
}
_ = x1.Args[2]
if ptr != x1.Args[0] {
break
}
x1_1 := x1.Args[1]
if x1_1.Op != OpARM64SRLconst {
break
}
if x1_1.AuxInt != 16 {
break
}
if w != x1_1.Args[0] {
break
}
x2 := x1.Args[2]
if x2.Op != OpARM64MOVBstore {
break
}
if x2.AuxInt != i-3 {
break
}
if x2.Aux != s {
break
}
_ = x2.Args[2]
if ptr != x2.Args[0] {
break
}
x2_1 := x2.Args[1]
if x2_1.Op != OpARM64SRLconst {
break
}
if x2_1.AuxInt != 24 {
break
}
if w != x2_1.Args[0] {
break
}
x3 := x2.Args[2]
if x3.Op != OpARM64MOVBstore {
break
}
if x3.AuxInt != i-4 {
break
}
if x3.Aux != s {
break
}
_ = x3.Args[2]
if ptr != x3.Args[0] {
break
}
x3_1 := x3.Args[1]
if x3_1.Op != OpARM64SRLconst {
break
}
if x3_1.AuxInt != 32 {
break
}
if w != x3_1.Args[0] {
break
}
x4 := x3.Args[2]
if x4.Op != OpARM64MOVBstore {
break
}
if x4.AuxInt != i-5 {
break
}
if x4.Aux != s {
break
}
_ = x4.Args[2]
if ptr != x4.Args[0] {
break
}
x4_1 := x4.Args[1]
if x4_1.Op != OpARM64SRLconst {
break
}
if x4_1.AuxInt != 40 {
break
}
if w != x4_1.Args[0] {
break
}
x5 := x4.Args[2]
if x5.Op != OpARM64MOVBstore {
break
}
if x5.AuxInt != i-6 {
break
}
if x5.Aux != s {
break
}
_ = x5.Args[2]
if ptr != x5.Args[0] {
break
}
x5_1 := x5.Args[1]
if x5_1.Op != OpARM64SRLconst {
break
}
if x5_1.AuxInt != 48 {
break
}
if w != x5_1.Args[0] {
break
}
x6 := x5.Args[2]
if x6.Op != OpARM64MOVBstore {
break
}
if x6.AuxInt != i-7 {
break
}
if x6.Aux != s {
break
}
_ = x6.Args[2]
if ptr != x6.Args[0] {
break
}
x6_1 := x6.Args[1]
if x6_1.Op != OpARM64SRLconst {
break
}
if x6_1.AuxInt != 56 {
break
}
if w != x6_1.Args[0] {
break
}
mem := x6.Args[2]
if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
break
}
v.reset(OpARM64MOVDstore)
v.AuxInt = i - 7
v.Aux = s
v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64REV, w.Type)
v0.AddArg(w)
v.AddArg(v0)
v.AddArg(mem)
return true
}
// match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVWUreg w)) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVWUreg w)) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVWUreg w)) mem))))
// cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
// result: (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
w := v.Args[1]
x0 := v.Args[2]
if x0.Op != OpARM64MOVBstore {
break
}
if x0.AuxInt != i-1 {
break
}
if x0.Aux != s {
break
}
_ = x0.Args[2]
if ptr != x0.Args[0] {
break
}
x0_1 := x0.Args[1]
if x0_1.Op != OpARM64SRLconst {
break
}
if x0_1.AuxInt != 8 {
break
}
x0_1_0 := x0_1.Args[0]
if x0_1_0.Op != OpARM64MOVWUreg {
break
}
if w != x0_1_0.Args[0] {
break
}
x1 := x0.Args[2]
if x1.Op != OpARM64MOVBstore {
break
}
if x1.AuxInt != i-2 {
break
}
if x1.Aux != s {
break
}
_ = x1.Args[2]
if ptr != x1.Args[0] {
break
}
x1_1 := x1.Args[1]
if x1_1.Op != OpARM64SRLconst {
break
}
if x1_1.AuxInt != 16 {
break
}
x1_1_0 := x1_1.Args[0]
if x1_1_0.Op != OpARM64MOVWUreg {
break
}
if w != x1_1_0.Args[0] {
break
}
x2 := x1.Args[2]
if x2.Op != OpARM64MOVBstore {
break
}
if x2.AuxInt != i-3 {
break
}
if x2.Aux != s {
break
}
_ = x2.Args[2]
if ptr != x2.Args[0] {
break
}
x2_1 := x2.Args[1]
if x2_1.Op != OpARM64SRLconst {
break
}
if x2_1.AuxInt != 24 {
break
}
x2_1_0 := x2_1.Args[0]
if x2_1_0.Op != OpARM64MOVWUreg {
break
}
if w != x2_1_0.Args[0] {
break
}
mem := x2.Args[2]
if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
break
}
v.reset(OpARM64MOVWstore)
v.AuxInt = i - 3
v.Aux = s
v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
v0.AddArg(w)
v.AddArg(v0)
v.AddArg(mem)
return true
}
// match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem))))
// cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
// result: (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
w := v.Args[1]
x0 := v.Args[2]
if x0.Op != OpARM64MOVBstore {
break
}
if x0.AuxInt != i-1 {
break
}
if x0.Aux != s {
break
}
_ = x0.Args[2]
if ptr != x0.Args[0] {
break
}
x0_1 := x0.Args[1]
if x0_1.Op != OpARM64SRLconst {
break
}
if x0_1.AuxInt != 8 {
break
}
if w != x0_1.Args[0] {
break
}
x1 := x0.Args[2]
if x1.Op != OpARM64MOVBstore {
break
}
if x1.AuxInt != i-2 {
break
}
if x1.Aux != s {
break
}
_ = x1.Args[2]
if ptr != x1.Args[0] {
break
}
x1_1 := x1.Args[1]
if x1_1.Op != OpARM64SRLconst {
break
}
if x1_1.AuxInt != 16 {
break
}
if w != x1_1.Args[0] {
break
}
x2 := x1.Args[2]
if x2.Op != OpARM64MOVBstore {
break
}
if x2.AuxInt != i-3 {
break
}
if x2.Aux != s {
break
}
_ = x2.Args[2]
if ptr != x2.Args[0] {
break
}
x2_1 := x2.Args[1]
if x2_1.Op != OpARM64SRLconst {
break
}
if x2_1.AuxInt != 24 {
break
}
if w != x2_1.Args[0] {
break
}
mem := x2.Args[2]
if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
break
}
v.reset(OpARM64MOVWstore)
v.AuxInt = i - 3
v.Aux = s
v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
v0.AddArg(w)
v.AddArg(v0)
v.AddArg(mem)
return true
}
// match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
// cond: x.Uses == 1 && clobber(x)
// result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
w := v.Args[1]
x := v.Args[2]
if x.Op != OpARM64MOVBstore {
break
}
if x.AuxInt != i-1 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
if ptr != x.Args[0] {
break
}
x_1 := x.Args[1]
if x_1.Op != OpARM64SRLconst {
break
}
if x_1.AuxInt != 8 {
break
}
if w != x_1.Args[0] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && clobber(x)) {
break
}
v.reset(OpARM64MOVHstore)
v.AuxInt = i - 1
v.Aux = s
v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
v0.AddArg(w)
v.AddArg(v0)
v.AddArg(mem)
return true
}
// match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVHUreg w)) mem))
// cond: x.Uses == 1 && clobber(x)
// result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
w := v.Args[1]
x := v.Args[2]
if x.Op != OpARM64MOVBstore {
break
}
if x.AuxInt != i-1 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
if ptr != x.Args[0] {
break
}
x_1 := x.Args[1]
if x_1.Op != OpARM64SRLconst {
break
}
if x_1.AuxInt != 8 {
break
}
x_1_0 := x_1.Args[0]
if x_1_0.Op != OpARM64MOVHUreg {
break
}
if w != x_1_0.Args[0] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && clobber(x)) {
break
}
v.reset(OpARM64MOVHstore)
v.AuxInt = i - 1
v.Aux = s
v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
v0.AddArg(w)
v.AddArg(v0)
v.AddArg(mem)
return true
}
// match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVWUreg w)) mem))
// cond: x.Uses == 1 && clobber(x)
// result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
w := v.Args[1]
x := v.Args[2]
if x.Op != OpARM64MOVBstore {
break
}
if x.AuxInt != i-1 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
if ptr != x.Args[0] {
break
}
x_1 := x.Args[1]
if x_1.Op != OpARM64SRLconst {
break
}
if x_1.AuxInt != 8 {
break
}
x_1_0 := x_1.Args[0]
if x_1_0.Op != OpARM64MOVWUreg {
break
}
if w != x_1_0.Args[0] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && clobber(x)) {
break
}
v.reset(OpARM64MOVHstore)
v.AuxInt = i - 1
v.Aux = s
v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
v0.AddArg(w)
v.AddArg(v0)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment