Commit 192b675f authored by erifan01's avatar erifan01 Committed by Cherry Zhang

cmd/compile: add an optimaztion rule for math/bits.ReverseBytes16 on arm64

On amd64 ReverseBytes16 is lowered to a rotate instruction. However arm64 doesn't
have 16-bit rotate instruction, but has a REV16W instruction which can be used
for ReverseBytes16. This CL adds a rule to turn the patterns like (x<<8) | (x>>8)
(the type of x is uint16, and "|" can also be "^" or "+") to a REV16W instruction.

Code:
func reverseBytes16(i uint16) uint16 { return bits.ReverseBytes16(i) }

Before:
        0x0004 00004 (test.go:6)        MOVHU   "".i(FP), R0
        0x0008 00008 ($GOROOT/src/math/bits/bits.go:262)        UBFX    $8, R0, $8, R1
        0x000c 00012 ($GOROOT/src/math/bits/bits.go:262)        ORR     R0<<8, R1, R0
        0x0010 00016 (test.go:6)        MOVH    R0, "".~r1+8(FP)
        0x0014 00020 (test.go:6)        RET     (R30)

After:
        0x0000 00000 (test.go:6)        MOVHU   "".i(FP), R0
        0x0004 00004 (test.go:6)        REV16W  R0, R0
        0x0008 00008 (test.go:6)        MOVH    R0, "".~r1+8(FP)
        0x000c 00012 (test.go:6)        RET     (R30)

Benchmarks:
name                old time/op       new time/op       delta
ReverseBytes-224    1.000000ns +- 0%  1.000000ns +- 0%     ~     (all equal)
ReverseBytes16-224  1.500000ns +- 0%  1.000000ns +- 0%  -33.33%  (p=0.000 n=9+10)
ReverseBytes32-224  1.000000ns +- 0%  1.000000ns +- 0%     ~     (all equal)
ReverseBytes64-224  1.000000ns +- 0%  1.000000ns +- 0%     ~     (all equal)

Change-Id: I87cd41b2d8e549bf39c601f185d5775bd42d739c
Reviewed-on: https://go-review.googlesource.com/c/157757Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 40df9cc6
......@@ -1786,6 +1786,9 @@
(CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc.(Op) == OpARM64LessThanU
-> (RORW x y)
// ((x>>8) | (x<<8)) -> (REV16W x), the type of x is uint16, "|" can also be "^" or "+".
((ADDshiftLL|ORshiftLL|XORshiftLL) <typ.UInt16> [8] (UBFX <typ.UInt16> [arm64BFAuxInt(8, 8)] x) x) -> (REV16W x)
// Extract from reg pair
(ADDshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
( ORshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
......
......@@ -2304,6 +2304,8 @@ func rewriteValueARM64_OpARM64ADDconst_0(v *Value) bool {
func rewriteValueARM64_OpARM64ADDshiftLL_0(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (ADDshiftLL (MOVDconst [c]) x [d])
// cond:
// result: (ADDconst [c] (SLLconst <x.Type> x [d]))
......@@ -2387,6 +2389,35 @@ func rewriteValueARM64_OpARM64ADDshiftLL_0(v *Value) bool {
v.AddArg(x)
return true
}
// match: (ADDshiftLL <typ.UInt16> [8] (UBFX <typ.UInt16> [arm64BFAuxInt(8, 8)] x) x)
// cond:
// result: (REV16W x)
for {
if v.Type != typ.UInt16 {
break
}
if v.AuxInt != 8 {
break
}
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARM64UBFX {
break
}
if v_0.Type != typ.UInt16 {
break
}
if v_0.AuxInt != arm64BFAuxInt(8, 8) {
break
}
x := v_0.Args[0]
if x != v.Args[1] {
break
}
v.reset(OpARM64REV16W)
v.AddArg(x)
return true
}
// match: (ADDshiftLL [c] (SRLconst x [64-c]) x2)
// cond:
// result: (EXTRconst [64-c] x2 x)
......@@ -26504,6 +26535,8 @@ func rewriteValueARM64_OpARM64ORconst_0(v *Value) bool {
func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (ORshiftLL (MOVDconst [c]) x [d])
// cond:
// result: (ORconst [c] (SLLconst <x.Type> x [d]))
......@@ -26610,6 +26643,35 @@ func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {
v.AddArg(x)
return true
}
// match: (ORshiftLL <typ.UInt16> [8] (UBFX <typ.UInt16> [arm64BFAuxInt(8, 8)] x) x)
// cond:
// result: (REV16W x)
for {
if v.Type != typ.UInt16 {
break
}
if v.AuxInt != 8 {
break
}
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARM64UBFX {
break
}
if v_0.Type != typ.UInt16 {
break
}
if v_0.AuxInt != arm64BFAuxInt(8, 8) {
break
}
x := v_0.Args[0]
if x != v.Args[1] {
break
}
v.reset(OpARM64REV16W)
v.AddArg(x)
return true
}
// match: (ORshiftLL [c] (SRLconst x [64-c]) x2)
// cond:
// result: (EXTRconst [64-c] x2 x)
......@@ -26739,6 +26801,11 @@ func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {
v0.AddArg(mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
b := v.Block
_ = b
// match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
// cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
// result: @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
......@@ -26795,11 +26862,6 @@ func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {
v0.AddArg(mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
b := v.Block
_ = b
// match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
// cond: x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
// result: @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr idx mem)
......@@ -27754,6 +27816,11 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
v0.AddArg(mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64ORshiftLL_20(v *Value) bool {
b := v.Block
_ = b
// match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem)))
// cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
// result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
......@@ -27810,11 +27877,6 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
v0.AddArg(v1)
return true
}
return false
}
func rewriteValueARM64_OpARM64ORshiftLL_20(v *Value) bool {
b := v.Block
_ = b
// match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem)))
// cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
// result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
......@@ -31905,6 +31967,8 @@ func rewriteValueARM64_OpARM64XORconst_0(v *Value) bool {
func rewriteValueARM64_OpARM64XORshiftLL_0(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (XORshiftLL (MOVDconst [c]) x [d])
// cond:
// result: (XORconst [c] (SLLconst <x.Type> x [d]))
......@@ -32010,6 +32074,35 @@ func rewriteValueARM64_OpARM64XORshiftLL_0(v *Value) bool {
v.AddArg(x)
return true
}
// match: (XORshiftLL <typ.UInt16> [8] (UBFX <typ.UInt16> [arm64BFAuxInt(8, 8)] x) x)
// cond:
// result: (REV16W x)
for {
if v.Type != typ.UInt16 {
break
}
if v.AuxInt != 8 {
break
}
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARM64UBFX {
break
}
if v_0.Type != typ.UInt16 {
break
}
if v_0.AuxInt != arm64BFAuxInt(8, 8) {
break
}
x := v_0.Args[0]
if x != v.Args[1] {
break
}
v.reset(OpARM64REV16W)
v.AddArg(x)
return true
}
// match: (XORshiftLL [c] (SRLconst x [64-c]) x2)
// cond:
// result: (EXTRconst [64-c] x2 x)
......
......@@ -170,6 +170,7 @@ func ReverseBytes32(n uint32) uint32 {
func ReverseBytes16(n uint16) uint16 {
// amd64:"ROLW"
// arm64:"REV16W",-"UBFX",-"ORR"
return bits.ReverseBytes16(n)
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment