Commit b548eee3 authored by Keith Randall's avatar Keith Randall

cmd/compile: fix load-combining rules

CL 33632 reorders args of commutative ops in order to make
CSE for commutative ops more robust.  Unfortunately, that
broke the load-combining rules which depend on a certain ordering
of OR ops' arguments.

Introduce some additional rules that order OR ops' arguments
consistently so that the load-combining rules fire.

Note: there's also something else wrong with the s390x rules.
I've filed #19059 for that.

Fixes #18946

Change-Id: I0a5447196bd88a55ccee683c69a57b943a9972e1
Reviewed-on: https://go-review.googlesource.com/36911
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarJosh Bleecher Snyder <josharian@gmail.com>
parent 76b4b8c7
...@@ -221,6 +221,39 @@ func f(b []byte, i int) uint32 { ...@@ -221,6 +221,39 @@ func f(b []byte, i int) uint32 {
`, `,
[]string{"\tMOVL\t\\(.*\\)\\(.*\\*1\\),"}, []string{"\tMOVL\t\\(.*\\)\\(.*\\*1\\),"},
}, },
{"s390x", "linux", `
import "encoding/binary"
func f(b []byte) uint32 {
return binary.LittleEndian.Uint32(b)
}
`,
[]string{"\tMOVWZ\t\\(.*\\),"},
},
{"s390x", "linux", `
import "encoding/binary"
func f(b []byte, i int) uint32 {
return binary.LittleEndian.Uint32(b[i:])
}
`,
[]string{"\tMOVWZ\t\\(.*\\)\\(.*\\*1\\),"},
},
{"s390x", "linux", `
import "encoding/binary"
func f(b []byte) uint64 {
return binary.LittleEndian.Uint64(b)
}
`,
[]string{"\tMOVD\t\\(.*\\),"},
},
{"s390x", "linux", `
import "encoding/binary"
func f(b []byte, i int) uint64 {
return binary.LittleEndian.Uint64(b[i:])
}
`,
[]string{"\tMOVD\t\\(.*\\)\\(.*\\*1\\),"},
},
// TODO: s390x big-endian tests.
// Structure zeroing. See issue #18370. // Structure zeroing. See issue #18370.
{"amd64", "linux", ` {"amd64", "linux", `
......
...@@ -1131,6 +1131,9 @@ ...@@ -1131,6 +1131,9 @@
(CMPWconst x [0]) -> (TESTW x x) (CMPWconst x [0]) -> (TESTW x x)
(CMPBconst x [0]) -> (TESTB x x) (CMPBconst x [0]) -> (TESTB x x)
// Move shifts to second argument of ORs. Helps load combining rules below.
(ORL x:(SHLLconst _) y) && y.Op != Op386SHLLconst -> (ORL y x)
// Combining byte loads into larger (unaligned) loads. // Combining byte loads into larger (unaligned) loads.
// There are many ways these combinations could occur. This is // There are many ways these combinations could occur. This is
// designed to match the way encoding/binary.LittleEndian does it. // designed to match the way encoding/binary.LittleEndian does it.
......
...@@ -1383,6 +1383,10 @@ ...@@ -1383,6 +1383,10 @@
(CMPWconst x [0]) -> (TESTW x x) (CMPWconst x [0]) -> (TESTW x x)
(CMPBconst x [0]) -> (TESTB x x) (CMPBconst x [0]) -> (TESTB x x)
// Move shifts to second argument of ORs. Helps load combining rules below.
(ORQ x:(SHLQconst _) y) && y.Op != OpAMD64SHLQconst -> (ORQ y x)
(ORL x:(SHLLconst _) y) && y.Op != OpAMD64SHLLconst -> (ORL y x)
// Combining byte loads into larger (unaligned) loads. // Combining byte loads into larger (unaligned) loads.
// There are many ways these combinations could occur. This is // There are many ways these combinations could occur. This is
// designed to match the way encoding/binary.LittleEndian does it. // designed to match the way encoding/binary.LittleEndian does it.
......
...@@ -1266,6 +1266,10 @@ ...@@ -1266,6 +1266,10 @@
&& clobber(x) && clobber(x)
-> (MOVDBRstoreidx [i-4] {s} p idx w0 mem) -> (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
// Move shifts to second argument of ORs. Helps load combining rules below.
(ORW x:(SLWconst _) y) && y.Op != OpS390XSLWconst -> (ORW y x)
(OR x:(SLDconst _) y) && y.Op != OpS390XSLDconst -> (OR y x)
// Combining byte loads into larger (unaligned) loads. // Combining byte loads into larger (unaligned) loads.
// Little endian loads. // Little endian loads.
......
...@@ -7465,6 +7465,23 @@ func rewriteValue386_Op386ORL(v *Value, config *Config) bool { ...@@ -7465,6 +7465,23 @@ func rewriteValue386_Op386ORL(v *Value, config *Config) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (ORL x:(SHLLconst _) y)
// cond: y.Op != Op386SHLLconst
// result: (ORL y x)
for {
x := v.Args[0]
if x.Op != Op386SHLLconst {
break
}
y := v.Args[1]
if !(y.Op != Op386SHLLconst) {
break
}
v.reset(Op386ORL)
v.AddArg(y)
v.AddArg(x)
return true
}
// match: (ORL x0:(MOVBload [i] {s} p mem) s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem))) // match: (ORL x0:(MOVBload [i] {s} p mem) s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem)))
// cond: x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) // cond: x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)
// result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem) // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem)
......
...@@ -11282,6 +11282,23 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool { ...@@ -11282,6 +11282,23 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (ORL x:(SHLLconst _) y)
// cond: y.Op != OpAMD64SHLLconst
// result: (ORL y x)
for {
x := v.Args[0]
if x.Op != OpAMD64SHLLconst {
break
}
y := v.Args[1]
if !(y.Op != OpAMD64SHLLconst) {
break
}
v.reset(OpAMD64ORL)
v.AddArg(y)
v.AddArg(x)
return true
}
// match: (ORL x0:(MOVBload [i] {s} p mem) s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem))) // match: (ORL x0:(MOVBload [i] {s} p mem) s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem)))
// cond: x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) // cond: x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)
// result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem) // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem)
...@@ -11909,6 +11926,23 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { ...@@ -11909,6 +11926,23 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (ORQ x:(SHLQconst _) y)
// cond: y.Op != OpAMD64SHLQconst
// result: (ORQ y x)
for {
x := v.Args[0]
if x.Op != OpAMD64SHLQconst {
break
}
y := v.Args[1]
if !(y.Op != OpAMD64SHLQconst) {
break
}
v.reset(OpAMD64ORQ)
v.AddArg(y)
v.AddArg(x)
return true
}
// match: (ORQ o0:(ORQ o1:(ORQ o2:(ORQ o3:(ORQ o4:(ORQ o5:(ORQ x0:(MOVBload [i] {s} p mem) s0:(SHLQconst [8] x1:(MOVBload [i+1] {s} p mem))) s1:(SHLQconst [16] x2:(MOVBload [i+2] {s} p mem))) s2:(SHLQconst [24] x3:(MOVBload [i+3] {s} p mem))) s3:(SHLQconst [32] x4:(MOVBload [i+4] {s} p mem))) s4:(SHLQconst [40] x5:(MOVBload [i+5] {s} p mem))) s5:(SHLQconst [48] x6:(MOVBload [i+6] {s} p mem))) s6:(SHLQconst [56] x7:(MOVBload [i+7] {s} p mem))) // match: (ORQ o0:(ORQ o1:(ORQ o2:(ORQ o3:(ORQ o4:(ORQ o5:(ORQ x0:(MOVBload [i] {s} p mem) s0:(SHLQconst [8] x1:(MOVBload [i+1] {s} p mem))) s1:(SHLQconst [16] x2:(MOVBload [i+2] {s} p mem))) s2:(SHLQconst [24] x3:(MOVBload [i+3] {s} p mem))) s3:(SHLQconst [32] x4:(MOVBload [i+4] {s} p mem))) s4:(SHLQconst [40] x5:(MOVBload [i+5] {s} p mem))) s5:(SHLQconst [48] x6:(MOVBload [i+6] {s} p mem))) s6:(SHLQconst [56] x7:(MOVBload [i+7] {s} p mem)))
// cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)
// result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQload [i] {s} p mem) // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQload [i] {s} p mem)
......
...@@ -14324,6 +14324,23 @@ func rewriteValueS390X_OpS390XOR(v *Value, config *Config) bool { ...@@ -14324,6 +14324,23 @@ func rewriteValueS390X_OpS390XOR(v *Value, config *Config) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (OR x:(SLDconst _) y)
// cond: y.Op != OpS390XSLDconst
// result: (OR y x)
for {
x := v.Args[0]
if x.Op != OpS390XSLDconst {
break
}
y := v.Args[1]
if !(y.Op != OpS390XSLDconst) {
break
}
v.reset(OpS390XOR)
v.AddArg(y)
v.AddArg(x)
return true
}
// match: (OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR x0:(MOVBZload [i] {s} p mem) s0:(SLDconst [8] x1:(MOVBZload [i+1] {s} p mem))) s1:(SLDconst [16] x2:(MOVBZload [i+2] {s} p mem))) s2:(SLDconst [24] x3:(MOVBZload [i+3] {s} p mem))) s3:(SLDconst [32] x4:(MOVBZload [i+4] {s} p mem))) s4:(SLDconst [40] x5:(MOVBZload [i+5] {s} p mem))) s5:(SLDconst [48] x6:(MOVBZload [i+6] {s} p mem))) s6:(SLDconst [56] x7:(MOVBZload [i+7] {s} p mem))) // match: (OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR x0:(MOVBZload [i] {s} p mem) s0:(SLDconst [8] x1:(MOVBZload [i+1] {s} p mem))) s1:(SLDconst [16] x2:(MOVBZload [i+2] {s} p mem))) s2:(SLDconst [24] x3:(MOVBZload [i+3] {s} p mem))) s3:(SLDconst [32] x4:(MOVBZload [i+4] {s} p mem))) s4:(SLDconst [40] x5:(MOVBZload [i+5] {s} p mem))) s5:(SLDconst [48] x6:(MOVBZload [i+6] {s} p mem))) s6:(SLDconst [56] x7:(MOVBZload [i+7] {s} p mem)))
// cond: p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) // cond: p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)
// result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDBRload [i] {s} p mem) // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDBRload [i] {s} p mem)
...@@ -15412,6 +15429,23 @@ func rewriteValueS390X_OpS390XORW(v *Value, config *Config) bool { ...@@ -15412,6 +15429,23 @@ func rewriteValueS390X_OpS390XORW(v *Value, config *Config) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (ORW x:(SLWconst _) y)
// cond: y.Op != OpS390XSLWconst
// result: (ORW y x)
for {
x := v.Args[0]
if x.Op != OpS390XSLWconst {
break
}
y := v.Args[1]
if !(y.Op != OpS390XSLWconst) {
break
}
v.reset(OpS390XORW)
v.AddArg(y)
v.AddArg(x)
return true
}
// match: (ORW x0:(MOVBZload [i] {s} p mem) s0:(SLWconst [8] x1:(MOVBZload [i+1] {s} p mem))) // match: (ORW x0:(MOVBZload [i] {s} p mem) s0:(SLWconst [8] x1:(MOVBZload [i+1] {s} p mem)))
// cond: p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) // cond: p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)
// result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i] {s} p mem)) // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i] {s} p mem))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment