Commit 77ba071e authored by Chad Rosier's avatar Chad Rosier Committed by Cherry Zhang

cmd/compile/internal/ssa: combine consecutive LittleEndian stores on arm64

This optimization mirrors that which is already implemented for AMD64.  The
optimization specifically targets the binary.LittleEndian.PutUint* functions.

encoding/binary results on Amberwing:
name                   old time/op    new time/op    delta
ReadSlice1000Int32s      9.67µs ± 1%    9.64µs ± 1%     ~     (p=0.185 n=9+9)
ReadStruct               5.24µs ± 2%    5.36µs ± 2%   +2.24%  (p=0.002 n=10+8)
ReadInts                 8.69µs ± 5%    8.88µs ± 5%     ~     (p=0.083 n=10+10)
WriteInts                3.90µs ±10%    3.71µs ± 9%     ~     (p=0.077 n=10+10)
WriteSlice1000Int32s     10.9µs ± 1%    10.9µs ± 1%     ~     (p=0.701 n=9+9)
PutUint16                 572ns ±14%     505ns ±11%  -11.75%  (p=0.006 n=9+10)
PutUint32                 550ns ±18%     540ns ±11%     ~     (p=0.692 n=10+10)
PutUint64                 565ns ±15%     540ns ±17%     ~     (p=0.248 n=10+10)
LittleEndianPutUint16     540ns ±11%     500ns ±10%     ~     (p=0.094 n=10+10)
LittleEndianPutUint32     520ns ±15%     480ns ±15%     ~     (p=0.087 n=10+10)
LittleEndianPutUint64     505ns ±29%     470ns ±17%     ~     (p=0.208 n=10+10)
PutUvarint32              700ns ±21%     635ns ±10%   -9.29%  (p=0.028 n=10+10)
PutUvarint64              740ns ± 8%     740ns ± 8%     ~     (p=0.713 n=10+10)
[Geo mean]               1.53µs         1.47µs        -3.93%

name                   old speed      new speed      delta
ReadSlice1000Int32s     414MB/s ± 1%   415MB/s ± 1%     ~     (p=0.185 n=9+9)
ReadStruct             14.3MB/s ± 2%  14.0MB/s ± 2%   -2.21%  (p=0.000 n=10+8)
ReadInts               3.45MB/s ± 4%  3.38MB/s ± 6%     ~     (p=0.085 n=10+10)
WriteInts              7.71MB/s ± 9%  8.09MB/s ± 8%   +4.93%  (p=0.048 n=10+10)
WriteSlice1000Int32s    367MB/s ± 1%   366MB/s ± 1%     ~     (p=0.701 n=9+9)
PutUint16              3.51MB/s ±14%  3.99MB/s ±11%  +13.47%  (p=0.009 n=9+10)
PutUint32              7.35MB/s ±21%  7.44MB/s ±10%     ~     (p=0.692 n=10+10)
PutUint64              14.3MB/s ±14%  15.0MB/s ±19%     ~     (p=0.248 n=10+10)
LittleEndianPutUint16  3.72MB/s ±11%  4.03MB/s ±10%     ~     (p=0.094 n=10+10)
LittleEndianPutUint32  7.75MB/s ±15%  8.39MB/s ±13%     ~     (p=0.087 n=10+10)
LittleEndianPutUint64  16.1MB/s ±23%  17.2MB/s ±16%     ~     (p=0.208 n=10+10)
PutUvarint32           5.76MB/s ±18%  6.32MB/s ±10%   +9.72%  (p=0.028 n=10+10)
PutUvarint64           10.8MB/s ± 8%  10.8MB/s ± 8%     ~     (p=0.713 n=10+10)
[Geo mean]             13.7MB/s       14.3MB/s        +4.02%

go1 results on Amberwing:
name                   old time/op    new time/op    delta
RegexpMatchEasy0_32       249ns ± 0%     249ns ± 0%    ~     (p=0.087 n=10+10)
RegexpMatchEasy0_1K       584ns ± 0%     584ns ± 0%    ~     (all equal)
RegexpMatchEasy1_32       246ns ± 0%     246ns ± 0%    ~     (p=1.000 n=10+10)
RegexpMatchEasy1_1K       806ns ± 0%     806ns ± 0%    ~     (p=0.706 n=10+9)
RegexpMatchMedium_32      314ns ± 0%     314ns ± 0%    ~     (all equal)
RegexpMatchMedium_1K     52.1µs ± 0%    52.1µs ± 0%    ~     (p=0.245 n=10+8)
RegexpMatchHard_32       2.75µs ± 1%    2.75µs ± 1%    ~     (p=0.690 n=10+10)
RegexpMatchHard_1K       78.9µs ± 0%    78.9µs ± 1%    ~     (p=0.295 n=9+9)
FmtFprintfEmpty          58.5ns ± 0%    58.5ns ± 0%    ~     (all equal)
FmtFprintfString          112ns ± 0%     112ns ± 0%    ~     (all equal)
FmtFprintfInt             117ns ± 0%     116ns ± 0%  -0.85%  (p=0.000 n=10+10)
FmtFprintfIntInt          181ns ± 0%     181ns ± 0%    ~     (all equal)
FmtFprintfPrefixedInt     222ns ± 0%     224ns ± 0%  +0.90%  (p=0.000 n=9+10)
FmtFprintfFloat           318ns ± 1%     322ns ± 0%    ~     (p=0.059 n=10+8)
FmtManyArgs               736ns ± 1%     735ns ± 0%    ~     (p=0.206 n=9+9)
Gzip                      437ms ± 0%     436ms ± 0%  -0.25%  (p=0.000 n=10+10)
HTTPClientServer         89.8µs ± 1%    90.2µs ± 2%    ~     (p=0.393 n=10+10)
JSONEncode               20.1ms ± 1%    20.2ms ± 1%    ~     (p=0.065 n=9+10)
JSONDecode               94.2ms ± 1%    93.9ms ± 1%  -0.42%  (p=0.043 n=10+10)
GobDecode                12.7ms ± 1%    12.8ms ± 2%  +0.94%  (p=0.019 n=10+10)
GobEncode                12.1ms ± 0%    12.1ms ± 0%    ~     (p=0.052 n=10+10)
Mandelbrot200            5.06ms ± 0%    5.05ms ± 0%  -0.04%  (p=0.000 n=9+10)
TimeParse                 450ns ± 3%     446ns ± 0%    ~     (p=0.238 n=10+9)
TimeFormat                485ns ± 1%     483ns ± 1%    ~     (p=0.073 n=10+10)
Template                 90.4ms ± 0%    90.7ms ± 0%  +0.29%  (p=0.000 n=8+10)
GoParse                  6.01ms ± 0%    6.03ms ± 0%  +0.35%  (p=0.000 n=10+10)
BinaryTree17              11.7s ± 0%     11.7s ± 0%    ~     (p=0.481 n=10+10)
Revcomp                   669ms ± 0%     669ms ± 0%    ~     (p=0.315 n=10+10)
Fannkuch11                3.40s ± 0%     3.37s ± 0%  -0.92%  (p=0.000 n=10+10)
[Geo mean]               67.9µs         67.9µs       +0.02%

name                   old speed      new speed      delta
RegexpMatchEasy0_32     128MB/s ± 0%   128MB/s ± 0%  -0.08%  (p=0.003 n=8+10)
RegexpMatchEasy0_1K    1.75GB/s ± 0%  1.75GB/s ± 0%    ~     (p=0.642 n=8+10)
RegexpMatchEasy1_32     130MB/s ± 0%   130MB/s ± 0%    ~     (p=0.690 n=10+9)
RegexpMatchEasy1_1K    1.27GB/s ± 0%  1.27GB/s ± 0%    ~     (p=0.661 n=10+9)
RegexpMatchMedium_32   3.18MB/s ± 0%  3.18MB/s ± 0%    ~     (all equal)
RegexpMatchMedium_1K   19.7MB/s ± 0%  19.6MB/s ± 0%    ~     (p=0.190 n=10+9)
RegexpMatchHard_32     11.6MB/s ± 0%  11.6MB/s ± 1%    ~     (p=0.669 n=10+10)
RegexpMatchHard_1K     13.0MB/s ± 0%  13.0MB/s ± 0%    ~     (p=0.718 n=9+9)
Gzip                   44.4MB/s ± 0%  44.5MB/s ± 0%  +0.24%  (p=0.000 n=10+10)
JSONEncode             96.5MB/s ± 1%  96.1MB/s ± 1%    ~     (p=0.065 n=9+10)
JSONDecode             20.6MB/s ± 1%  20.7MB/s ± 1%  +0.42%  (p=0.041 n=10+10)
GobDecode              60.6MB/s ± 1%  60.0MB/s ± 2%  -0.92%  (p=0.016 n=10+10)
GobEncode              63.4MB/s ± 0%  63.6MB/s ± 0%    ~     (p=0.055 n=10+10)
Template               21.5MB/s ± 0%  21.4MB/s ± 0%  -0.30%  (p=0.000 n=9+10)
GoParse                9.64MB/s ± 0%  9.61MB/s ± 0%  -0.36%  (p=0.000 n=10+10)
Revcomp                 380MB/s ± 0%   380MB/s ± 0%    ~     (p=0.323 n=10+10)
[Geo mean]             56.0MB/s       55.9MB/s       -0.07%

Change-Id: I79a4978d42d01a5f72ed5ceec07f5e78ac6b3859
Reviewed-on: https://go-review.googlesource.com/97175
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent 562346b7
......@@ -3290,6 +3290,34 @@ var linuxARM64Tests = []*asmTest{
pos: []string{"STP"},
neg: []string{"MOVB", "MOVH", "MOVW"},
},
// Check that stores are combine into larger stores
{
fn: `
func $(b []byte, v uint16) {
binary.LittleEndian.PutUint16(b, v)
}
`,
pos: []string{"MOVH"},
neg: []string{"MOVB"},
},
{
fn: `
func $(b []byte, v uint32) {
binary.LittleEndian.PutUint32(b, v)
}
`,
pos: []string{"MOVW"},
neg: []string{"MOVB", "MOVH"},
},
{
fn: `
func $(b []byte, v uint64) {
binary.LittleEndian.PutUint64(b, v)
}
`,
pos: []string{"MOVD"},
neg: []string{"MOVB", "MOVH", "MOVW"},
},
}
var linuxMIPSTests = []*asmTest{
......@@ -3685,4 +3713,4 @@ package main
func Mod32(x uint32) uint32 {
return x % 3 // frontend rewrites it as HMUL with 2863311531, the LITERAL node has unknown Pos
}
`
\ No newline at end of file
`
......@@ -1494,6 +1494,58 @@
&& clobber(x)
-> (MOVQstorezero [min(i,j)] {s} ptr0 mem)
// Combine stores into larger (unaligned) stores.
(MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
&& clobber(x)
-> (MOVHstore [i-1] {s} ptr0 w mem)
(MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVHUreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
&& clobber(x)
-> (MOVHstore [i-1] {s} ptr0 w mem)
(MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVWUreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
&& clobber(x)
-> (MOVHstore [i-1] {s} ptr0 w mem)
(MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
&& clobber(x)
-> (MOVHstore [i-1] {s} ptr0 w0 mem)
(MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVWUreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVWUreg w)) mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
&& clobber(x)
-> (MOVHstore [i-1] {s} ptr0 w0 mem)
(MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
&& clobber(x)
-> (MOVWstore [i-2] {s} ptr0 w mem)
(MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVWUreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
&& clobber(x)
-> (MOVWstore [i-2] {s} ptr0 w mem)
(MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
&& clobber(x)
-> (MOVWstore [i-2] {s} ptr0 w0 mem)
(MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
&& clobber(x)
-> (MOVDstore [i-4] {s} ptr0 w mem)
(MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
&& clobber(x)
-> (MOVDstore [i-4] {s} ptr0 w0 mem)
// FP simplification
(FNEGS (FMULS x y)) -> (FNMULS x y)
(FNEGD (FMULD x y)) -> (FNMULD x y)
......
......@@ -138,7 +138,7 @@ func rewriteValueARM64(v *Value) bool {
case OpARM64MOVBreg:
return rewriteValueARM64_OpARM64MOVBreg_0(v)
case OpARM64MOVBstore:
return rewriteValueARM64_OpARM64MOVBstore_0(v)
return rewriteValueARM64_OpARM64MOVBstore_0(v) || rewriteValueARM64_OpARM64MOVBstore_10(v)
case OpARM64MOVBstorezero:
return rewriteValueARM64_OpARM64MOVBstorezero_0(v)
case OpARM64MOVDload:
......@@ -6079,6 +6079,250 @@ func rewriteValueARM64_OpARM64MOVBstore_0(v *Value) bool {
v.AddArg(mem)
return true
}
// match: (MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
// cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
// result: (MOVHstore [i-1] {s} ptr0 w mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
ptr0 := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64SRLconst {
break
}
if v_1.AuxInt != 8 {
break
}
w := v_1.Args[0]
x := v.Args[2]
if x.Op != OpARM64MOVBstore {
break
}
if x.AuxInt != i-1 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
ptr1 := x.Args[0]
if w != x.Args[1] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
break
}
v.reset(OpARM64MOVHstore)
v.AuxInt = i - 1
v.Aux = s
v.AddArg(ptr0)
v.AddArg(w)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
// match: (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVHUreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
// cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
// result: (MOVHstore [i-1] {s} ptr0 w mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
ptr0 := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64SRLconst {
break
}
if v_1.AuxInt != 8 {
break
}
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpARM64MOVHUreg {
break
}
w := v_1_0.Args[0]
x := v.Args[2]
if x.Op != OpARM64MOVBstore {
break
}
if x.AuxInt != i-1 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
ptr1 := x.Args[0]
if w != x.Args[1] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
break
}
v.reset(OpARM64MOVHstore)
v.AuxInt = i - 1
v.Aux = s
v.AddArg(ptr0)
v.AddArg(w)
v.AddArg(mem)
return true
}
// match: (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVWUreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
// cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
// result: (MOVHstore [i-1] {s} ptr0 w mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
ptr0 := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64SRLconst {
break
}
if v_1.AuxInt != 8 {
break
}
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpARM64MOVWUreg {
break
}
w := v_1_0.Args[0]
x := v.Args[2]
if x.Op != OpARM64MOVBstore {
break
}
if x.AuxInt != i-1 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
ptr1 := x.Args[0]
if w != x.Args[1] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
break
}
v.reset(OpARM64MOVHstore)
v.AuxInt = i - 1
v.Aux = s
v.AddArg(ptr0)
v.AddArg(w)
v.AddArg(mem)
return true
}
// match: (MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem))
// cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
// result: (MOVHstore [i-1] {s} ptr0 w0 mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
ptr0 := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64SRLconst {
break
}
j := v_1.AuxInt
w := v_1.Args[0]
x := v.Args[2]
if x.Op != OpARM64MOVBstore {
break
}
if x.AuxInt != i-1 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
ptr1 := x.Args[0]
w0 := x.Args[1]
if w0.Op != OpARM64SRLconst {
break
}
if w0.AuxInt != j-8 {
break
}
if w != w0.Args[0] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
break
}
v.reset(OpARM64MOVHstore)
v.AuxInt = i - 1
v.Aux = s
v.AddArg(ptr0)
v.AddArg(w0)
v.AddArg(mem)
return true
}
// match: (MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVWUreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVWUreg w)) mem))
// cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
// result: (MOVHstore [i-1] {s} ptr0 w0 mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
ptr0 := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64SRLconst {
break
}
j := v_1.AuxInt
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpARM64MOVWUreg {
break
}
w := v_1_0.Args[0]
x := v.Args[2]
if x.Op != OpARM64MOVBstore {
break
}
if x.AuxInt != i-1 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
ptr1 := x.Args[0]
w0 := x.Args[1]
if w0.Op != OpARM64SRLconst {
break
}
if w0.AuxInt != j-8 {
break
}
w0_0 := w0.Args[0]
if w0_0.Op != OpARM64MOVWUreg {
break
}
if w != w0_0.Args[0] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
break
}
v.reset(OpARM64MOVHstore)
v.AuxInt = i - 1
v.Aux = s
v.AddArg(ptr0)
v.AddArg(w0)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool {
......@@ -6943,6 +7187,144 @@ func rewriteValueARM64_OpARM64MOVHstore_0(v *Value) bool {
v.AddArg(mem)
return true
}
// match: (MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
// cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
// result: (MOVWstore [i-2] {s} ptr0 w mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
ptr0 := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64SRLconst {
break
}
if v_1.AuxInt != 16 {
break
}
w := v_1.Args[0]
x := v.Args[2]
if x.Op != OpARM64MOVHstore {
break
}
if x.AuxInt != i-2 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
ptr1 := x.Args[0]
if w != x.Args[1] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
break
}
v.reset(OpARM64MOVWstore)
v.AuxInt = i - 2
v.Aux = s
v.AddArg(ptr0)
v.AddArg(w)
v.AddArg(mem)
return true
}
// match: (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVWUreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
// cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
// result: (MOVWstore [i-2] {s} ptr0 w mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
ptr0 := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64SRLconst {
break
}
if v_1.AuxInt != 16 {
break
}
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpARM64MOVWUreg {
break
}
w := v_1_0.Args[0]
x := v.Args[2]
if x.Op != OpARM64MOVHstore {
break
}
if x.AuxInt != i-2 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
ptr1 := x.Args[0]
if w != x.Args[1] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
break
}
v.reset(OpARM64MOVWstore)
v.AuxInt = i - 2
v.Aux = s
v.AddArg(ptr0)
v.AddArg(w)
v.AddArg(mem)
return true
}
// match: (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
// cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
// result: (MOVWstore [i-2] {s} ptr0 w0 mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
ptr0 := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64SRLconst {
break
}
j := v_1.AuxInt
w := v_1.Args[0]
x := v.Args[2]
if x.Op != OpARM64MOVHstore {
break
}
if x.AuxInt != i-2 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
ptr1 := x.Args[0]
w0 := x.Args[1]
if w0.Op != OpARM64SRLconst {
break
}
if w0.AuxInt != j-16 {
break
}
if w != w0.Args[0] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
break
}
v.reset(OpARM64MOVWstore)
v.AuxInt = i - 2
v.Aux = s
v.AddArg(ptr0)
v.AddArg(w0)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64MOVHstorezero_0(v *Value) bool {
......@@ -7604,6 +7986,97 @@ func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
v.AddArg(mem)
return true
}
// match: (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
// cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
// result: (MOVDstore [i-4] {s} ptr0 w mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
ptr0 := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64SRLconst {
break
}
if v_1.AuxInt != 32 {
break
}
w := v_1.Args[0]
x := v.Args[2]
if x.Op != OpARM64MOVWstore {
break
}
if x.AuxInt != i-4 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
ptr1 := x.Args[0]
if w != x.Args[1] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
break
}
v.reset(OpARM64MOVDstore)
v.AuxInt = i - 4
v.Aux = s
v.AddArg(ptr0)
v.AddArg(w)
v.AddArg(mem)
return true
}
// match: (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
// cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
// result: (MOVDstore [i-4] {s} ptr0 w0 mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
ptr0 := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64SRLconst {
break
}
j := v_1.AuxInt
w := v_1.Args[0]
x := v.Args[2]
if x.Op != OpARM64MOVWstore {
break
}
if x.AuxInt != i-4 {
break
}
if x.Aux != s {
break
}
_ = x.Args[2]
ptr1 := x.Args[0]
w0 := x.Args[1]
if w0.Op != OpARM64SRLconst {
break
}
if w0.AuxInt != j-32 {
break
}
if w != w0.Args[0] {
break
}
mem := x.Args[2]
if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
break
}
v.reset(OpARM64MOVDstore)
v.AuxInt = i - 4
v.Aux = s
v.AddArg(ptr0)
v.AddArg(w0)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment