Commit 204cc14b authored by erifan01's avatar erifan01 Committed by Cherry Zhang

cmd/compile: implement non-constant rotates using ROR on arm64

Add some rules to match the Go code like:
	y &= 63
	x << y | x >> (64-y)
or
	y &= 63
	x >> y | x << (64-y)
as a ROR instruction. Make math/bits.RotateLeft faster on arm64.

Extends CL 132435 to arm64.

Benchmarks of math/bits.RotateLeftxxN:
name            old time/op       new time/op       delta
RotateLeft-8    3.548750ns +- 1%  2.003750ns +- 0%  -43.54%  (p=0.000 n=8+8)
RotateLeft8-8   3.925000ns +- 0%  3.925000ns +- 0%     ~     (p=1.000 n=8+8)
RotateLeft16-8  3.925000ns +- 0%  3.927500ns +- 0%     ~     (p=0.608 n=8+8)
RotateLeft32-8  3.925000ns +- 0%  2.002500ns +- 0%  -48.98%  (p=0.000 n=8+8)
RotateLeft64-8  3.536250ns +- 0%  2.003750ns +- 0%  -43.34%  (p=0.000 n=8+8)

Change-Id: I77622cd7f39b917427e060647321f5513973232c
Reviewed-on: https://go-review.googlesource.com/122542
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent d8c8a142
......@@ -195,7 +195,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
ssa.OpARM64FNMULS,
ssa.OpARM64FNMULD,
ssa.OpARM64FDIVS,
ssa.OpARM64FDIVD:
ssa.OpARM64FDIVD,
ssa.OpARM64ROR,
ssa.OpARM64RORW:
r := v.Reg()
r1 := v.Args[0].Reg()
r2 := v.Args[1].Reg()
......
......@@ -3361,12 +3361,12 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue2(ssa.OpRotateLeft32, types.Types[TUINT32], args[0], args[1])
},
sys.AMD64, sys.S390X)
sys.AMD64, sys.ARM64, sys.S390X)
addF("math/bits", "RotateLeft64",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue2(ssa.OpRotateLeft64, types.Types[TUINT64], args[0], args[1])
},
sys.AMD64, sys.S390X)
sys.AMD64, sys.ARM64, sys.S390X)
alias("math/bits", "RotateLeft", "math/bits", "RotateLeft64", p8...)
makeOnesCountAMD64 := func(op64 ssa.Op, op32 ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
......
......@@ -89,6 +89,10 @@
(Round x) -> (FRINTAD x)
(Trunc x) -> (FRINTZD x)
// lowering rotates
(RotateLeft32 x y) -> (RORW x (NEG <y.Type> y))
(RotateLeft64 x y) -> (ROR x (NEG <y.Type> y))
(Ctz64NonZero x) -> (Ctz64 x)
(Ctz32NonZero x) -> (Ctz32 x)
......@@ -127,6 +131,8 @@
// shifts
// hardware instruction uses only the low 6 bits of the shift
// we compare to 64 to ensure Go semantics for large shifts
// Rules about rotates with non-const shift are based on the following rules,
// if the following rules change, please also modify the rules based on them.
(Lsh64x64 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
(Lsh64x32 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
(Lsh64x16 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
......@@ -1592,7 +1598,7 @@
(ORNshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [-1])
(ORNshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [-1])
// Generate rotates
// Generate rotates with const shift
(ADDshiftLL [c] (SRLconst x [64-c]) x) -> (RORconst [64-c] x)
( ORshiftLL [c] (SRLconst x [64-c]) x) -> (RORconst [64-c] x)
(XORshiftLL [c] (SRLconst x [64-c]) x) -> (RORconst [64-c] x)
......@@ -1610,6 +1616,38 @@
( ORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [c] x)
(XORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [c] x)
(RORconst [c] (RORconst [d] x)) -> (RORconst [(c+d)&63] x)
(RORWconst [c] (RORWconst [d] x)) -> (RORWconst [(c+d)&31] x)
// Generate rotates with non-const shift.
// These rules match the Go source code like
// y &= 63
// x << y | x >> (64-y)
// "|" can also be "^" or "+".
// As arm64 does not have a ROL instruction, so ROL(x, y) is replaced by ROR(x, -y).
((ADD|OR|XOR) (SLL x (ANDconst <t> [63] y))
(CSEL0 <typ.UInt64> {cc} (SRL <typ.UInt64> x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
(CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))))) && cc.(Op) == OpARM64LessThanU
-> (ROR x (NEG <t> y))
((ADD|OR|XOR) (SRL <typ.UInt64> x (ANDconst <t> [63] y))
(CSEL0 <typ.UInt64> {cc} (SLL x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
(CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))))) && cc.(Op) == OpARM64LessThanU
-> (ROR x y)
// These rules match the Go source code like
// y &= 31
// x << y | x >> (32-y)
// "|" can also be "^" or "+".
// As arm64 does not have a ROLW instruction, so ROLW(x, y) is replaced by RORW(x, -y).
((ADD|OR|XOR) (SLL x (ANDconst <t> [31] y))
(CSEL0 <typ.UInt32> {cc} (SRL <typ.UInt32> (MOVWUreg x) (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
(CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc.(Op) == OpARM64LessThanU
-> (RORW x (NEG <t> y))
((ADD|OR|XOR) (SRL <typ.UInt32> (MOVWUreg x) (ANDconst <t> [31] y))
(CSEL0 <typ.UInt32> {cc} (SLL x (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
(CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc.(Op) == OpARM64LessThanU
-> (RORW x y)
// Extract from reg pair
(ADDshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
( ORshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
......
......@@ -248,6 +248,8 @@ func init() {
{name: "SRLconst", argLength: 1, reg: gp11, asm: "LSR", aux: "Int64"}, // arg0 >> auxInt, unsigned
{name: "SRA", argLength: 2, reg: gp21, asm: "ASR"}, // arg0 >> arg1, signed, shift amount is mod 64
{name: "SRAconst", argLength: 1, reg: gp11, asm: "ASR", aux: "Int64"}, // arg0 >> auxInt, signed
{name: "ROR", argLength: 2, reg: gp21, asm: "ROR"}, // arg0 right rotate by (arg1 mod 64) bits
{name: "RORW", argLength: 2, reg: gp21, asm: "RORW"}, // arg0 right rotate by (arg1 mod 32) bits
{name: "RORconst", argLength: 1, reg: gp11, asm: "ROR", aux: "Int64"}, // arg0 right rotate by auxInt bits
{name: "RORWconst", argLength: 1, reg: gp11, asm: "RORW", aux: "Int64"}, // uint32(arg0) right rotate by auxInt bits
{name: "EXTRconst", argLength: 2, reg: gp21, asm: "EXTR", aux: "Int64"}, // extract 64 bits from arg0:arg1 starting at lsb auxInt
......
......@@ -1139,6 +1139,8 @@ const (
OpARM64SRLconst
OpARM64SRA
OpARM64SRAconst
OpARM64ROR
OpARM64RORW
OpARM64RORconst
OpARM64RORWconst
OpARM64EXTRconst
......@@ -15104,6 +15106,34 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "ROR",
argLen: 2,
asm: arm64.AROR,
reg: regInfo{
inputs: []inputInfo{
{0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "RORW",
argLen: 2,
asm: arm64.ARORW,
reg: regInfo{
inputs: []inputInfo{
{0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "RORconst",
auxType: auxInt64,
......
......@@ -195,6 +195,7 @@ func RotateLeft8(n uint8) uint8 {
func RotateLeftVariable(n uint, m int) uint {
// amd64:"ROLQ"
// arm64:"ROR"
// ppc64:"ROTL"
// s390x:"RLLG"
return bits.RotateLeft(n, m)
......@@ -202,6 +203,7 @@ func RotateLeftVariable(n uint, m int) uint {
func RotateLeftVariable64(n uint64, m int) uint64 {
// amd64:"ROLQ"
// arm64:"ROR"
// ppc64:"ROTL"
// s390x:"RLLG"
return bits.RotateLeft64(n, m)
......@@ -209,6 +211,7 @@ func RotateLeftVariable64(n uint64, m int) uint64 {
func RotateLeftVariable32(n uint32, m int) uint32 {
// amd64:"ROLL"
// arm64:"RORW"
// ppc64:"ROTLW"
// s390x:"RLL"
return bits.RotateLeft32(n, m)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment