Commit fa3fe2e3 authored by Lynn Boger's avatar Lynn Boger

cmd/compile, math/bits: add rotate rules to PPC64.rules

This adds rules to match the code in math/bits RotateLeft,
RotateLeft32, and RotateLef64 to allow them to be inlined.

The rules are complicated because the code in these function
use different types, and the non-const version of these
shifts generate Mask and Carry instructions that become
subexpressions during the match process.

Also adds a testcase to asm_test.go.

Improvement in math/bits:

BenchmarkRotateLeft-16       1.57     1.32      -15.92%
BenchmarkRotateLeft32-16     1.60     1.37      -14.37%
BenchmarkRotateLeft64-16     1.57     1.32      -15.92%

Updates #21390

Change-Id: Ib6f17669ecc9cab54f18d690be27e2225ca654a4
Reviewed-on: https://go-review.googlesource.com/59932
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarDavid Chase <drchase@google.com>
parent 4768408e
......@@ -265,7 +265,7 @@ var allAsmTests = []*asmTests{
{
arch: "ppc64le",
os: "linux",
imports: []string{"math"},
imports: []string{"math", "math/bits"},
tests: linuxPPC64LETests,
},
{
......@@ -2004,6 +2004,23 @@ var linuxPPC64LETests = []*asmTest{
`,
pos: []string{"\tROTL\t"},
},
{
fn: `
func f10(a uint32) uint32 {
return bits.RotateLeft32(a, 9)
}
`,
pos: []string{"\tROTLW\t"},
},
{
fn: `
func f11(a uint64) uint64 {
return bits.RotateLeft64(a, 37)
}
`,
pos: []string{"\tROTL\t"},
},
{
// check that stack store is optimized away
fn: `
......
......@@ -37,6 +37,7 @@ func initssaconfig() {
Float32: types.Types[TFLOAT32],
Float64: types.Types[TFLOAT64],
Int: types.Types[TINT],
UInt: types.Types[TUINT],
Uintptr: types.Types[TUINTPTR],
String: types.Types[TSTRING],
BytePtr: types.NewPtr(types.Types[TUINT8]),
......
......@@ -542,6 +542,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS,
ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV:
......
......@@ -59,6 +59,7 @@ type Types struct {
Int *types.Type
Float32 *types.Type
Float64 *types.Type
UInt *types.Type
Uintptr *types.Type
String *types.Type
BytePtr *types.Type // TODO: use unsafe.Pointer instead?
......
......@@ -88,7 +88,7 @@
(ConstNil) -> (MOVDconst [0])
(ConstBool [b]) -> (MOVDconst [b])
// Rotate generation
// Rotate generation with const shift
(ADD (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
( OR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
(XOR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
......@@ -97,6 +97,16 @@
( OR (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (ROTLWconst [c] x)
(XOR (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (ROTLWconst [c] x)
// Rotate generation with non-const shift
// these match patterns from math/bits/RotateLeft[32|64], but there could be others
(ADD (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) -> (ROTL x y)
( OR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) -> (ROTL x y)
(XOR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) -> (ROTL x y)
(ADD (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) -> (ROTLW x y)
( OR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) -> (ROTLW x y)
(XOR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) -> (ROTLW x y)
(Lsh64x64 x (Const64 [c])) && uint64(c) < 64 -> (SLDconst x [c])
(Rsh64x64 x (Const64 [c])) && uint64(c) < 64 -> (SRADconst x [c])
(Rsh64Ux64 x (Const64 [c])) && uint64(c) < 64 -> (SRDconst x [c])
......@@ -166,10 +176,38 @@
(Rsh8x32 x (MOVDconst [c])) && uint32(c) < 8 -> (SRAWconst (SignExt8to32 x) [c])
(Rsh8Ux32 x (MOVDconst [c])) && uint32(c) < 8 -> (SRWconst (ZeroExt8to32 x) [c])
// non-constant rotates
// These are subexpressions found in statements that can become rotates
// In these cases the shift count is known to be < 64 so the more complicated expressions
// with Mask & Carry is not needed
(Lsh64x64 x (AND y (MOVDconst [63]))) -> (SLD x (ANDconst <typ.Int64> [63] y))
(Lsh64x64 x (ANDconst <typ.Int64> [63] y)) -> (SLD x (ANDconst <typ.Int64> [63] y))
(Rsh64Ux64 x (AND y (MOVDconst [63]))) -> (SRD x (ANDconst <typ.Int64> [63] y))
(Rsh64Ux64 x (ANDconst <typ.UInt> [63] y)) -> (SRD x (ANDconst <typ.UInt> [63] y))
(Rsh64Ux64 x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y))) -> (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
(Rsh64Ux64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63])))) -> (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
(Rsh64x64 x (AND y (MOVDconst [63]))) -> (SRAD x (ANDconst <typ.Int64> [63] y))
(Rsh64x64 x (ANDconst <typ.UInt> [63] y)) -> (SRAD x (ANDconst <typ.UInt> [63] y))
(Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y))) -> (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
(Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63])))) -> (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
(Rsh64x64 x y) -> (SRAD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
(Rsh64Ux64 x y) -> (SRD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
(Lsh64x64 x y) -> (SLD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
(Lsh32x64 x (AND y (MOVDconst [31]))) -> (SLW x (ANDconst <typ.Int32> [31] y))
(Lsh32x64 x (ANDconst <typ.Int32> [31] y)) -> (SLW x (ANDconst <typ.Int32> [31] y))
(Rsh32Ux64 x (AND y (MOVDconst [31]))) -> (SRW x (ANDconst <typ.Int32> [31] y))
(Rsh32Ux64 x (ANDconst <typ.UInt> [31] y)) -> (SRW x (ANDconst <typ.UInt> [31] y))
(Rsh32Ux64 x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))) -> (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
(Rsh32Ux64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31])))) -> (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
(Rsh32x64 x (AND y (MOVDconst [31]))) -> (SRAW x (ANDconst <typ.Int32> [31] y))
(Rsh32x64 x (ANDconst <typ.UInt> [31] y)) -> (SRAW x (ANDconst <typ.UInt> [31] y))
(Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))) -> (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
(Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31])))) -> (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
(Rsh32x64 x y) -> (SRAW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
(Rsh32Ux64 x y) -> (SRW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
(Lsh32x64 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
......@@ -182,7 +220,6 @@
(Rsh8Ux64 x y) -> (SRW (ZeroExt8to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] y))))
(Lsh8x64 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] y))))
(Rsh64x32 x y) -> (SRAD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
(Rsh64Ux32 x y) -> (SRD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
(Lsh64x32 x y) -> (SLD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
......
......@@ -186,6 +186,9 @@ func init() {
{name: "SLD", argLength: 2, reg: gp21, asm: "SLD"}, // arg0 << arg1, 64 bits (0 if arg1 & 64 != 0)
{name: "SLW", argLength: 2, reg: gp21, asm: "SLW"}, // arg0 << arg1, 32 bits (0 if arg1 & 32 != 0)
{name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"}, // arg0 rotate left by arg1 mod 64
{name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32
{name: "ADDconstForCarry", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, aux: "Int16", asm: "ADDC", typ: "Flags"}, // _, carry := arg0 + aux
{name: "MaskIfNotCarry", argLength: 1, reg: crgp, asm: "ADDME", typ: "Int64"}, // carry - 1 (if carry then 0 else -1)
......
......@@ -1307,6 +1307,8 @@ const (
OpPPC64SRW
OpPPC64SLD
OpPPC64SLW
OpPPC64ROTL
OpPPC64ROTLW
OpPPC64ADDconstForCarry
OpPPC64MaskIfNotCarry
OpPPC64SRADconst
......@@ -16732,6 +16734,34 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "ROTL",
argLen: 2,
asm: ppc64.AROTL,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
{1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "ROTLW",
argLen: 2,
asm: ppc64.AROTLW,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
{1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "ADDconstForCarry",
auxType: auxInt16,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment