Commit fee84cc9 authored by erifan01's avatar erifan01 Committed by Cherry Zhang

cmd/compile: add an optimization rule for math/bits.ReverseBytes16 on arm

This CL adds two rules to turn patterns like ((x<<8) | (x>>8)) (the type of
x is uint16, "|" can also be "+" or "^") to a REV16 instruction on arm v6+.
This optimization rule can be used for math/bits.ReverseBytes16.

Benchmarks on arm v6:
name               old time/op  new time/op  delta
ReverseBytes-32    2.86ns ± 0%  2.86ns ± 0%   ~     (all equal)
ReverseBytes16-32  2.86ns ± 0%  2.86ns ± 0%   ~     (all equal)
ReverseBytes32-32  1.29ns ± 0%  1.29ns ± 0%   ~     (all equal)
ReverseBytes64-32  1.43ns ± 0%  1.43ns ± 0%   ~     (all equal)

Change-Id: I819e633c9a9d308f8e476fb0c82d73fb73dd019f
Reviewed-on: https://go-review.googlesource.com/c/go/+/159019Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent a2ace8ec
......@@ -659,6 +659,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMMVN,
ssa.OpARMCLZ,
ssa.OpARMREV,
ssa.OpARMREV16,
ssa.OpARMRBIT,
ssa.OpARMSQRTD,
ssa.OpARMNEGF,
......
......@@ -1216,6 +1216,12 @@
( ORshiftRL [c] (SLLconst x [32-c]) x) -> (SRRconst [ c] x)
(XORshiftRL [c] (SLLconst x [32-c]) x) -> (SRRconst [ c] x)
// ((x>>8) | (x<<8)) -> (REV16 x), the type of x is uint16, "|" can also be "^" or "+".
// UBFX instruction is supported by ARMv6T2, ARMv7 and above versions, REV16 is supported by
// ARMv6 and above versions. So for ARMv6, we need to match SLLconst, SRLconst and ORshiftLL.
((ADDshiftLL|ORshiftLL|XORshiftLL) <typ.UInt16> [8] (BFXU <typ.UInt16> [armBFAuxInt(8, 8)] x) x) -> (REV16 x)
((ADDshiftLL|ORshiftLL|XORshiftLL) <typ.UInt16> [8] (SRLconst <typ.UInt16> [24] (SLLconst [16] x)) x) && objabi.GOARM>=6 -> (REV16 x)
// use indexed loads and stores
(MOVWload [0] {sym} (ADD ptr idx) mem) && sym == nil && !config.nacl -> (MOVWloadidx ptr idx mem)
(MOVWstore [0] {sym} (ADD ptr idx) val mem) && sym == nil && !config.nacl -> (MOVWstoreidx ptr idx val mem)
......
......@@ -207,9 +207,10 @@ func init() {
{name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64
{name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
{name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zero
{name: "REV", argLength: 1, reg: gp11, asm: "REV"}, // reverse byte order
{name: "RBIT", argLength: 1, reg: gp11, asm: "RBIT"}, // reverse bit order
{name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zero
{name: "REV", argLength: 1, reg: gp11, asm: "REV"}, // reverse byte order
{name: "REV16", argLength: 1, reg: gp11, asm: "REV16"}, // reverse byte order in 16-bit halfwords
{name: "RBIT", argLength: 1, reg: gp11, asm: "RBIT"}, // reverse bit order
// shifts
{name: "SLL", argLength: 2, reg: gp21, asm: "SLL"}, // arg0 << arg1, shift amount is mod 256
......
......@@ -907,6 +907,7 @@ const (
OpARMSQRTD
OpARMCLZ
OpARMREV
OpARMREV16
OpARMRBIT
OpARMSLL
OpARMSLLconst
......@@ -12036,6 +12037,19 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "REV16",
argLen: 1,
asm: arm.AREV16,
reg: regInfo{
inputs: []inputInfo{
{0, 22527}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12 R14
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "RBIT",
argLen: 1,
......
......@@ -1037,13 +1037,13 @@ func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
return false
}
// encodes the lsb and width for arm64 bitfield ops into the expected auxInt format.
func arm64BFAuxInt(lsb, width int64) int64 {
// encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
func armBFAuxInt(lsb, width int64) int64 {
if lsb < 0 || lsb > 63 {
panic("ARM64 bit field lsb constant out of range")
panic("ARM(64) bit field lsb constant out of range")
}
if width < 1 || width > 64 {
panic("ARM64 bit field width constant out of range")
panic("ARM(64) bit field width constant out of range")
}
return width | lsb<<8
}
......
......@@ -2933,6 +2933,8 @@ func rewriteValueARM_OpARMADDconst_0(v *Value) bool {
func rewriteValueARM_OpARMADDshiftLL_0(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (ADDshiftLL (MOVWconst [c]) x [d])
// cond:
// result: (ADDconst [c] (SLLconst <x.Type> x [d]))
......@@ -2992,6 +2994,74 @@ func rewriteValueARM_OpARMADDshiftLL_0(v *Value) bool {
v.AddArg(x)
return true
}
// match: (ADDshiftLL <typ.UInt16> [8] (BFXU <typ.UInt16> [armBFAuxInt(8, 8)] x) x)
// cond:
// result: (REV16 x)
for {
if v.Type != typ.UInt16 {
break
}
if v.AuxInt != 8 {
break
}
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARMBFXU {
break
}
if v_0.Type != typ.UInt16 {
break
}
if v_0.AuxInt != armBFAuxInt(8, 8) {
break
}
x := v_0.Args[0]
if x != v.Args[1] {
break
}
v.reset(OpARMREV16)
v.AddArg(x)
return true
}
// match: (ADDshiftLL <typ.UInt16> [8] (SRLconst <typ.UInt16> [24] (SLLconst [16] x)) x)
// cond: objabi.GOARM>=6
// result: (REV16 x)
for {
if v.Type != typ.UInt16 {
break
}
if v.AuxInt != 8 {
break
}
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARMSRLconst {
break
}
if v_0.Type != typ.UInt16 {
break
}
if v_0.AuxInt != 24 {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpARMSLLconst {
break
}
if v_0_0.AuxInt != 16 {
break
}
x := v_0_0.Args[0]
if x != v.Args[1] {
break
}
if !(objabi.GOARM >= 6) {
break
}
v.reset(OpARMREV16)
v.AddArg(x)
return true
}
return false
}
func rewriteValueARM_OpARMADDshiftLLreg_0(v *Value) bool {
......@@ -11952,6 +12022,8 @@ func rewriteValueARM_OpARMORconst_0(v *Value) bool {
func rewriteValueARM_OpARMORshiftLL_0(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (ORshiftLL (MOVWconst [c]) x [d])
// cond:
// result: (ORconst [c] (SLLconst <x.Type> x [d]))
......@@ -12011,6 +12083,74 @@ func rewriteValueARM_OpARMORshiftLL_0(v *Value) bool {
v.AddArg(x)
return true
}
// match: (ORshiftLL <typ.UInt16> [8] (BFXU <typ.UInt16> [armBFAuxInt(8, 8)] x) x)
// cond:
// result: (REV16 x)
for {
if v.Type != typ.UInt16 {
break
}
if v.AuxInt != 8 {
break
}
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARMBFXU {
break
}
if v_0.Type != typ.UInt16 {
break
}
if v_0.AuxInt != armBFAuxInt(8, 8) {
break
}
x := v_0.Args[0]
if x != v.Args[1] {
break
}
v.reset(OpARMREV16)
v.AddArg(x)
return true
}
// match: (ORshiftLL <typ.UInt16> [8] (SRLconst <typ.UInt16> [24] (SLLconst [16] x)) x)
// cond: objabi.GOARM>=6
// result: (REV16 x)
for {
if v.Type != typ.UInt16 {
break
}
if v.AuxInt != 8 {
break
}
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARMSRLconst {
break
}
if v_0.Type != typ.UInt16 {
break
}
if v_0.AuxInt != 24 {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpARMSLLconst {
break
}
if v_0_0.AuxInt != 16 {
break
}
x := v_0_0.Args[0]
if x != v.Args[1] {
break
}
if !(objabi.GOARM >= 6) {
break
}
v.reset(OpARMREV16)
v.AddArg(x)
return true
}
// match: (ORshiftLL x y:(SLLconst x [c]) [d])
// cond: c==d
// result: y
......@@ -17230,6 +17370,8 @@ func rewriteValueARM_OpARMXORconst_0(v *Value) bool {
func rewriteValueARM_OpARMXORshiftLL_0(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (XORshiftLL (MOVWconst [c]) x [d])
// cond:
// result: (XORconst [c] (SLLconst <x.Type> x [d]))
......@@ -17289,6 +17431,74 @@ func rewriteValueARM_OpARMXORshiftLL_0(v *Value) bool {
v.AddArg(x)
return true
}
// match: (XORshiftLL <typ.UInt16> [8] (BFXU <typ.UInt16> [armBFAuxInt(8, 8)] x) x)
// cond:
// result: (REV16 x)
for {
if v.Type != typ.UInt16 {
break
}
if v.AuxInt != 8 {
break
}
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARMBFXU {
break
}
if v_0.Type != typ.UInt16 {
break
}
if v_0.AuxInt != armBFAuxInt(8, 8) {
break
}
x := v_0.Args[0]
if x != v.Args[1] {
break
}
v.reset(OpARMREV16)
v.AddArg(x)
return true
}
// match: (XORshiftLL <typ.UInt16> [8] (SRLconst <typ.UInt16> [24] (SLLconst [16] x)) x)
// cond: objabi.GOARM>=6
// result: (REV16 x)
for {
if v.Type != typ.UInt16 {
break
}
if v.AuxInt != 8 {
break
}
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARMSRLconst {
break
}
if v_0.Type != typ.UInt16 {
break
}
if v_0.AuxInt != 24 {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpARMSLLconst {
break
}
if v_0_0.AuxInt != 16 {
break
}
x := v_0_0.Args[0]
if x != v.Args[1] {
break
}
if !(objabi.GOARM >= 6) {
break
}
v.reset(OpARMREV16)
v.AddArg(x)
return true
}
// match: (XORshiftLL x (SLLconst x [c]) [d])
// cond: c==d
// result: (MOVWconst [0])
......
......@@ -171,6 +171,9 @@ func ReverseBytes32(n uint32) uint32 {
func ReverseBytes16(n uint16) uint16 {
// amd64:"ROLW"
// arm64:"REV16W",-"UBFX",-"ORR"
// arm/5:"SLL","SRL","ORR"
// arm/6:"REV16"
// arm/7:"REV16"
return bits.ReverseBytes16(n)
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment