Commit b003afe4 authored by Brian Kessler's avatar Brian Kessler Committed by Brad Fitzpatrick

cmd/compile: intrinsify RotateLeft32 on wasm

wasm has 32-bit versions of all integer operations. This change
lowers RotateLeft32 to i32.rotl on wasm and intrinsifies the math/bits
call.  Benchmarking on amd64 under node.js this is ~25% faster.

node v10.15.3/amd64
name          old time/op  new time/op  delta
RotateLeft    8.37ns ± 1%  8.28ns ± 0%   -1.05%  (p=0.029 n=4+4)
RotateLeft8   11.9ns ± 1%  11.8ns ± 0%     ~     (p=0.167 n=5+5)
RotateLeft16  11.8ns ± 0%  11.8ns ± 0%     ~     (all equal)
RotateLeft32  11.9ns ± 1%   8.7ns ± 0%  -26.32%  (p=0.008 n=5+5)
RotateLeft64  8.31ns ± 1%  8.43ns ± 2%     ~     (p=0.063 n=5+5)

Updates #31265

Change-Id: I5b8e155978faeea536c4f6427ac9564d2f096a46
Reviewed-on: https://go-review.googlesource.com/c/go/+/182359
Run-TryBot: Brian Kessler <brian.m.kessler@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarRichard Musiol <neelance@gmail.com>
parent 12dbd20a
...@@ -3504,7 +3504,7 @@ func init() { ...@@ -3504,7 +3504,7 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value { func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue2(ssa.OpRotateLeft32, types.Types[TUINT32], args[0], args[1]) return s.newValue2(ssa.OpRotateLeft32, types.Types[TUINT32], args[0], args[1])
}, },
sys.AMD64, sys.ARM, sys.ARM64, sys.S390X, sys.PPC64) sys.AMD64, sys.ARM, sys.ARM64, sys.S390X, sys.PPC64, sys.Wasm)
addF("math/bits", "RotateLeft64", addF("math/bits", "RotateLeft64",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value { func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue2(ssa.OpRotateLeft64, types.Types[TUINT64], args[0], args[1]) return s.newValue2(ssa.OpRotateLeft64, types.Types[TUINT64], args[0], args[1])
......
...@@ -147,7 +147,8 @@ ...@@ -147,7 +147,8 @@
// Lowering rotates // Lowering rotates
(RotateLeft8 <t> x (I64Const [c])) -> (Or8 (Lsh8x64 <t> x (I64Const [c&7])) (Rsh8Ux64 <t> x (I64Const [-c&7]))) (RotateLeft8 <t> x (I64Const [c])) -> (Or8 (Lsh8x64 <t> x (I64Const [c&7])) (Rsh8Ux64 <t> x (I64Const [-c&7])))
(RotateLeft16 <t> x (I64Const [c])) -> (Or16 (Lsh16x64 <t> x (I64Const [c&15])) (Rsh16Ux64 <t> x (I64Const [-c&15]))) (RotateLeft16 <t> x (I64Const [c])) -> (Or16 (Lsh16x64 <t> x (I64Const [c&15])) (Rsh16Ux64 <t> x (I64Const [-c&15])))
(RotateLeft32 <t> x (I64Const [c])) -> (Or32 (Lsh32x64 <t> x (I64Const [c&31])) (Rsh32Ux64 <t> x (I64Const [-c&31]))) (RotateLeft32 x y) -> (I32Rotl x y)
(RotateLeft64 x y) -> (I64Rotl x y)
// Lowering comparisons // Lowering comparisons
(Less64 x y) -> (I64LtS x y) (Less64 x y) -> (I64LtS x y)
...@@ -362,8 +363,6 @@ ...@@ -362,8 +363,6 @@
(BitLen64 x) -> (I64Sub (I64Const [64]) (I64Clz x)) (BitLen64 x) -> (I64Sub (I64Const [64]) (I64Clz x))
(RotateLeft64 x y) -> (I64Rotl x y)
(PopCount64 x) -> (I64Popcnt x) (PopCount64 x) -> (I64Popcnt x)
(PopCount32 x) -> (I64Popcnt (ZeroExt32to64 x)) (PopCount32 x) -> (I64Popcnt (ZeroExt32to64 x))
(PopCount16 x) -> (I64Popcnt (ZeroExt16to64 x)) (PopCount16 x) -> (I64Popcnt (ZeroExt16to64 x))
......
...@@ -206,6 +206,7 @@ func init() { ...@@ -206,6 +206,7 @@ func init() {
{name: "I64Ctz", asm: "I64Ctz", argLength: 1, reg: gp11, typ: "Int64"}, // ctz(arg0) {name: "I64Ctz", asm: "I64Ctz", argLength: 1, reg: gp11, typ: "Int64"}, // ctz(arg0)
{name: "I64Clz", asm: "I64Clz", argLength: 1, reg: gp11, typ: "Int64"}, // clz(arg0) {name: "I64Clz", asm: "I64Clz", argLength: 1, reg: gp11, typ: "Int64"}, // clz(arg0)
{name: "I32Rotl", asm: "I32Rotl", argLength: 2, reg: gp21, typ: "Int32"}, // rotl(arg0, arg1)
{name: "I64Rotl", asm: "I64Rotl", argLength: 2, reg: gp21, typ: "Int64"}, // rotl(arg0, arg1) {name: "I64Rotl", asm: "I64Rotl", argLength: 2, reg: gp21, typ: "Int64"}, // rotl(arg0, arg1)
{name: "I64Popcnt", asm: "I64Popcnt", argLength: 1, reg: gp11, typ: "Int64"}, // popcnt(arg0) {name: "I64Popcnt", asm: "I64Popcnt", argLength: 1, reg: gp11, typ: "Int64"}, // popcnt(arg0)
} }
......
...@@ -2168,6 +2168,7 @@ const ( ...@@ -2168,6 +2168,7 @@ const (
OpWasmF64Copysign OpWasmF64Copysign
OpWasmI64Ctz OpWasmI64Ctz
OpWasmI64Clz OpWasmI64Clz
OpWasmI32Rotl
OpWasmI64Rotl OpWasmI64Rotl
OpWasmI64Popcnt OpWasmI64Popcnt
...@@ -29140,6 +29141,20 @@ var opcodeTable = [...]opInfo{ ...@@ -29140,6 +29141,20 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "I32Rotl",
argLen: 2,
asm: wasm.AI32Rotl,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 SP
{1, 4295032831}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 SP
},
outputs: []outputInfo{
{0, 65535}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{ {
name: "I64Rotl", name: "I64Rotl",
argLen: 2, argLen: 2,
......
...@@ -3826,36 +3826,17 @@ func rewriteValueWasm_OpRotateLeft16_0(v *Value) bool { ...@@ -3826,36 +3826,17 @@ func rewriteValueWasm_OpRotateLeft16_0(v *Value) bool {
return false return false
} }
func rewriteValueWasm_OpRotateLeft32_0(v *Value) bool { func rewriteValueWasm_OpRotateLeft32_0(v *Value) bool {
b := v.Block // match: (RotateLeft32 x y)
typ := &b.Func.Config.Types
// match: (RotateLeft32 <t> x (I64Const [c]))
// cond: // cond:
// result: (Or32 (Lsh32x64 <t> x (I64Const [c&31])) (Rsh32Ux64 <t> x (I64Const [-c&31]))) // result: (I32Rotl x y)
for { for {
t := v.Type y := v.Args[1]
_ = v.Args[1]
x := v.Args[0] x := v.Args[0]
v_1 := v.Args[1] v.reset(OpWasmI32Rotl)
if v_1.Op != OpWasmI64Const { v.AddArg(x)
break v.AddArg(y)
}
c := v_1.AuxInt
v.reset(OpOr32)
v0 := b.NewValue0(v.Pos, OpLsh32x64, t)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpWasmI64Const, typ.Int64)
v1.AuxInt = c & 31
v0.AddArg(v1)
v.AddArg(v0)
v2 := b.NewValue0(v.Pos, OpRsh32Ux64, t)
v2.AddArg(x)
v3 := b.NewValue0(v.Pos, OpWasmI64Const, typ.Int64)
v3.AuxInt = -c & 31
v2.AddArg(v3)
v.AddArg(v2)
return true return true
} }
return false
} }
func rewriteValueWasm_OpRotateLeft64_0(v *Value) bool { func rewriteValueWasm_OpRotateLeft64_0(v *Value) bool {
// match: (RotateLeft64 x y) // match: (RotateLeft64 x y)
......
...@@ -299,6 +299,12 @@ func ssaGenValueOnStack(s *gc.SSAGenState, v *ssa.Value, extend bool) { ...@@ -299,6 +299,12 @@ func ssaGenValueOnStack(s *gc.SSAGenState, v *ssa.Value, extend bool) {
getValue64(s, v.Args[1]) getValue64(s, v.Args[1])
s.Prog(v.Op.Asm()) s.Prog(v.Op.Asm())
case ssa.OpWasmI32Rotl:
getValue32(s, v.Args[0])
getValue32(s, v.Args[1])
s.Prog(wasm.AI32Rotl)
s.Prog(wasm.AI64ExtendI32U)
case ssa.OpWasmI64DivS: case ssa.OpWasmI64DivS:
getValue64(s, v.Args[0]) getValue64(s, v.Args[0])
getValue64(s, v.Args[1]) getValue64(s, v.Args[1])
......
...@@ -213,6 +213,7 @@ func RotateLeft32(n uint32) uint32 { ...@@ -213,6 +213,7 @@ func RotateLeft32(n uint32) uint32 {
// ppc64:"ROTLW" // ppc64:"ROTLW"
// ppc64le:"ROTLW" // ppc64le:"ROTLW"
// s390x:"RLL" // s390x:"RLL"
// wasm:"I32Rotl"
return bits.RotateLeft32(n, 9) return bits.RotateLeft32(n, 9)
} }
...@@ -232,6 +233,7 @@ func RotateLeftVariable(n uint, m int) uint { ...@@ -232,6 +233,7 @@ func RotateLeftVariable(n uint, m int) uint {
// ppc64:"ROTL" // ppc64:"ROTL"
// ppc64le:"ROTL" // ppc64le:"ROTL"
// s390x:"RLLG" // s390x:"RLLG"
// wasm:"I64Rotl"
return bits.RotateLeft(n, m) return bits.RotateLeft(n, m)
} }
...@@ -241,6 +243,7 @@ func RotateLeftVariable64(n uint64, m int) uint64 { ...@@ -241,6 +243,7 @@ func RotateLeftVariable64(n uint64, m int) uint64 {
// ppc64:"ROTL" // ppc64:"ROTL"
// ppc64le:"ROTL" // ppc64le:"ROTL"
// s390x:"RLLG" // s390x:"RLLG"
// wasm:"I64Rotl"
return bits.RotateLeft64(n, m) return bits.RotateLeft64(n, m)
} }
...@@ -251,6 +254,7 @@ func RotateLeftVariable32(n uint32, m int) uint32 { ...@@ -251,6 +254,7 @@ func RotateLeftVariable32(n uint32, m int) uint32 {
// ppc64:"ROTLW" // ppc64:"ROTLW"
// ppc64le:"ROTLW" // ppc64le:"ROTLW"
// s390x:"RLL" // s390x:"RLL"
// wasm:"I32Rotl"
return bits.RotateLeft32(n, m) return bits.RotateLeft32(n, m)
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment