Commit 8149db4f authored by erifan01's avatar erifan01 Committed by Cherry Zhang

cmd/compile: intrinsify math.RoundToEven and math.Abs on arm64

math.RoundToEven can be done by one arm64 instruction FRINTND, intrinsify it to improve performance.
The current pure Go implementation of the function Abs is translated into five instructions on arm64:
str, ldr, and, str, ldr. The intrinsic implementation requires only one instruction, so in terms of
performance, intrinsify it is worthwhile.

Benchmarks:
name           old time/op  new time/op  delta
Abs-8          3.50ns ± 0%  1.50ns ± 0%  -57.14%  (p=0.000 n=10+10)
RoundToEven-8  9.26ns ± 0%  1.50ns ± 0%  -83.80%  (p=0.000 n=10+10)

Change-Id: I9456b26ab282b544dfac0154fc86f17aed96ac3d
Reviewed-on: https://go-review.googlesource.com/116535Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent a2a3dd00
...@@ -698,6 +698,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -698,6 +698,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
fallthrough fallthrough
case ssa.OpARM64MVN, case ssa.OpARM64MVN,
ssa.OpARM64NEG, ssa.OpARM64NEG,
ssa.OpARM64FABSD,
ssa.OpARM64FMOVDfpgp, ssa.OpARM64FMOVDfpgp,
ssa.OpARM64FMOVDgpfp, ssa.OpARM64FMOVDgpfp,
ssa.OpARM64FNEGS, ssa.OpARM64FNEGS,
...@@ -730,6 +731,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -730,6 +731,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
ssa.OpARM64CLZW, ssa.OpARM64CLZW,
ssa.OpARM64FRINTAD, ssa.OpARM64FRINTAD,
ssa.OpARM64FRINTMD, ssa.OpARM64FRINTMD,
ssa.OpARM64FRINTND,
ssa.OpARM64FRINTPD, ssa.OpARM64FRINTPD,
ssa.OpARM64FRINTZD: ssa.OpARM64FRINTZD:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
......
...@@ -3149,12 +3149,12 @@ func init() { ...@@ -3149,12 +3149,12 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value { func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpRoundToEven, types.Types[TFLOAT64], args[0]) return s.newValue1(ssa.OpRoundToEven, types.Types[TFLOAT64], args[0])
}, },
sys.S390X) sys.ARM64, sys.S390X)
addF("math", "Abs", addF("math", "Abs",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value { func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpAbs, types.Types[TFLOAT64], args[0]) return s.newValue1(ssa.OpAbs, types.Types[TFLOAT64], args[0])
}, },
sys.PPC64) sys.ARM64, sys.PPC64)
addF("math", "Copysign", addF("math", "Copysign",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value { func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue2(ssa.OpCopysign, types.Types[TFLOAT64], args[0], args[1]) return s.newValue2(ssa.OpCopysign, types.Types[TFLOAT64], args[0], args[1])
......
...@@ -83,10 +83,12 @@ ...@@ -83,10 +83,12 @@
(Com8 x) -> (MVN x) (Com8 x) -> (MVN x)
// math package intrinsics // math package intrinsics
(Abs x) -> (FABSD x)
(Sqrt x) -> (FSQRTD x) (Sqrt x) -> (FSQRTD x)
(Ceil x) -> (FRINTPD x) (Ceil x) -> (FRINTPD x)
(Floor x) -> (FRINTMD x) (Floor x) -> (FRINTMD x)
(Round x) -> (FRINTAD x) (Round x) -> (FRINTAD x)
(RoundToEven x) -> (FRINTND x)
(Trunc x) -> (FRINTZD x) (Trunc x) -> (FRINTZD x)
// lowering rotates // lowering rotates
......
...@@ -212,6 +212,7 @@ func init() { ...@@ -212,6 +212,7 @@ func init() {
// unary ops // unary ops
{name: "MVN", argLength: 1, reg: gp11, asm: "MVN"}, // ^arg0 {name: "MVN", argLength: 1, reg: gp11, asm: "MVN"}, // ^arg0
{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0 {name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0
{name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD"}, // abs(arg0), float64
{name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS"}, // -arg0, float32 {name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS"}, // -arg0, float32
{name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD"}, // -arg0, float64 {name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD"}, // -arg0, float64
{name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD"}, // sqrt(arg0), float64 {name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD"}, // sqrt(arg0), float64
...@@ -424,6 +425,7 @@ func init() { ...@@ -424,6 +425,7 @@ func init() {
// floating-point round to integral // floating-point round to integral
{name: "FRINTAD", argLength: 1, reg: fp11, asm: "FRINTAD"}, {name: "FRINTAD", argLength: 1, reg: fp11, asm: "FRINTAD"},
{name: "FRINTMD", argLength: 1, reg: fp11, asm: "FRINTMD"}, {name: "FRINTMD", argLength: 1, reg: fp11, asm: "FRINTMD"},
{name: "FRINTND", argLength: 1, reg: fp11, asm: "FRINTND"},
{name: "FRINTPD", argLength: 1, reg: fp11, asm: "FRINTPD"}, {name: "FRINTPD", argLength: 1, reg: fp11, asm: "FRINTPD"},
{name: "FRINTZD", argLength: 1, reg: fp11, asm: "FRINTZD"}, {name: "FRINTZD", argLength: 1, reg: fp11, asm: "FRINTZD"},
......
...@@ -1107,6 +1107,7 @@ const ( ...@@ -1107,6 +1107,7 @@ const (
OpARM64LoweredMuluhilo OpARM64LoweredMuluhilo
OpARM64MVN OpARM64MVN
OpARM64NEG OpARM64NEG
OpARM64FABSD
OpARM64FNEGS OpARM64FNEGS
OpARM64FNEGD OpARM64FNEGD
OpARM64FSQRTD OpARM64FSQRTD
...@@ -1277,6 +1278,7 @@ const ( ...@@ -1277,6 +1278,7 @@ const (
OpARM64FCVTDS OpARM64FCVTDS
OpARM64FRINTAD OpARM64FRINTAD
OpARM64FRINTMD OpARM64FRINTMD
OpARM64FRINTND
OpARM64FRINTPD OpARM64FRINTPD
OpARM64FRINTZD OpARM64FRINTZD
OpARM64CSEL OpARM64CSEL
...@@ -14658,6 +14660,19 @@ var opcodeTable = [...]opInfo{ ...@@ -14658,6 +14660,19 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "FABSD",
argLen: 1,
asm: arm64.AFABSD,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{ {
name: "FNEGS", name: "FNEGS",
argLen: 1, argLen: 1,
...@@ -16965,6 +16980,19 @@ var opcodeTable = [...]opInfo{ ...@@ -16965,6 +16980,19 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "FRINTND",
argLen: 1,
asm: arm64.AFRINTND,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{ {
name: "FRINTPD", name: "FRINTPD",
argLen: 1, argLen: 1,
......
...@@ -331,6 +331,8 @@ func rewriteValueARM64(v *Value) bool { ...@@ -331,6 +331,8 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64XORshiftRA_0(v) return rewriteValueARM64_OpARM64XORshiftRA_0(v)
case OpARM64XORshiftRL: case OpARM64XORshiftRL:
return rewriteValueARM64_OpARM64XORshiftRL_0(v) return rewriteValueARM64_OpARM64XORshiftRL_0(v)
case OpAbs:
return rewriteValueARM64_OpAbs_0(v)
case OpAdd16: case OpAdd16:
return rewriteValueARM64_OpAdd16_0(v) return rewriteValueARM64_OpAdd16_0(v)
case OpAdd32: case OpAdd32:
...@@ -747,6 +749,8 @@ func rewriteValueARM64(v *Value) bool { ...@@ -747,6 +749,8 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpRound32F_0(v) return rewriteValueARM64_OpRound32F_0(v)
case OpRound64F: case OpRound64F:
return rewriteValueARM64_OpRound64F_0(v) return rewriteValueARM64_OpRound64F_0(v)
case OpRoundToEven:
return rewriteValueARM64_OpRoundToEven_0(v)
case OpRsh16Ux16: case OpRsh16Ux16:
return rewriteValueARM64_OpRsh16Ux16_0(v) return rewriteValueARM64_OpRsh16Ux16_0(v)
case OpRsh16Ux32: case OpRsh16Ux32:
...@@ -29214,6 +29218,17 @@ func rewriteValueARM64_OpARM64XORshiftRL_0(v *Value) bool { ...@@ -29214,6 +29218,17 @@ func rewriteValueARM64_OpARM64XORshiftRL_0(v *Value) bool {
} }
return false return false
} }
func rewriteValueARM64_OpAbs_0(v *Value) bool {
// match: (Abs x)
// cond:
// result: (FABSD x)
for {
x := v.Args[0]
v.reset(OpARM64FABSD)
v.AddArg(x)
return true
}
}
func rewriteValueARM64_OpAdd16_0(v *Value) bool { func rewriteValueARM64_OpAdd16_0(v *Value) bool {
// match: (Add16 x y) // match: (Add16 x y)
// cond: // cond:
...@@ -33407,6 +33422,17 @@ func rewriteValueARM64_OpRound64F_0(v *Value) bool { ...@@ -33407,6 +33422,17 @@ func rewriteValueARM64_OpRound64F_0(v *Value) bool {
return true return true
} }
} }
func rewriteValueARM64_OpRoundToEven_0(v *Value) bool {
// match: (RoundToEven x)
// cond:
// result: (FRINTND x)
for {
x := v.Args[0]
v.reset(OpARM64FRINTND)
v.AddArg(x)
return true
}
}
func rewriteValueARM64_OpRsh16Ux16_0(v *Value) bool { func rewriteValueARM64_OpRsh16Ux16_0(v *Value) bool {
b := v.Block b := v.Block
_ = b _ = b
......
...@@ -32,6 +32,7 @@ func approx(x float64) { ...@@ -32,6 +32,7 @@ func approx(x float64) {
sink64[3] = math.Trunc(x) sink64[3] = math.Trunc(x)
// s390x:"FIDBR\t[$]4" // s390x:"FIDBR\t[$]4"
// arm64:"FRINTND"
sink64[4] = math.RoundToEven(x) sink64[4] = math.RoundToEven(x)
} }
...@@ -48,6 +49,7 @@ func sqrt(x float64) float64 { ...@@ -48,6 +49,7 @@ func sqrt(x float64) float64 {
// Check that it's using integer registers // Check that it's using integer registers
func abs(x, y float64) { func abs(x, y float64) {
// amd64:"BTRQ\t[$]63" // amd64:"BTRQ\t[$]63"
// arm64:"FABSD\t"
// s390x:"LPDFR\t",-"MOVD\t" (no integer load/store) // s390x:"LPDFR\t",-"MOVD\t" (no integer load/store)
// ppc64le:"FABS\t" // ppc64le:"FABS\t"
sink64[0] = math.Abs(x) sink64[0] = math.Abs(x)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment