Commit 4745604b authored by Michael Munday's avatar Michael Munday

cmd/compile: intrinsify math.RoundToEven on s390x

The new RoundToEven function can be implemented as a single FIDBR
instruction on s390x.

name         old time/op  new time/op  delta
RoundToEven  5.32ns ± 1%  0.86ns ± 1%  -83.86%  (p=0.000 n=10+10)

Change-Id: Iaf597e57a0d1085961701e3c75ff4f6f6dcebb5f
Reviewed-on: https://go-review.googlesource.com/74350
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 51daa25c
...@@ -1633,6 +1633,14 @@ var linuxS390XTests = []*asmTest{ ...@@ -1633,6 +1633,14 @@ var linuxS390XTests = []*asmTest{
`, `,
pos: []string{"\tFIDBR\t[$]5"}, pos: []string{"\tFIDBR\t[$]5"},
}, },
{
fn: `
func roundToEven(x float64) float64 {
return math.RoundToEven(x)
}
`,
pos: []string{"\tFIDBR\t[$]4"},
},
{ {
// check that stack store is optimized away // check that stack store is optimized away
fn: ` fn: `
......
...@@ -2807,6 +2807,11 @@ func init() { ...@@ -2807,6 +2807,11 @@ func init() {
return s.newValue1(ssa.OpRound, types.Types[TFLOAT64], args[0]) return s.newValue1(ssa.OpRound, types.Types[TFLOAT64], args[0])
}, },
sys.S390X) sys.S390X)
addF("math", "RoundToEven",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpRoundToEven, types.Types[TFLOAT64], args[0])
},
sys.S390X)
addF("math", "Abs", addF("math", "Abs",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value { func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpAbs, types.Types[TFLOAT64], args[0]) return s.newValue1(ssa.OpAbs, types.Types[TFLOAT64], args[0])
......
...@@ -108,11 +108,12 @@ ...@@ -108,11 +108,12 @@
(Bswap32 x) -> (MOVWBR x) (Bswap32 x) -> (MOVWBR x)
// math package intrinsics // math package intrinsics
(Sqrt x) -> (FSQRT x) (Sqrt x) -> (FSQRT x)
(Floor x) -> (FIDBR [7] x) (Floor x) -> (FIDBR [7] x)
(Ceil x) -> (FIDBR [6] x) (Ceil x) -> (FIDBR [6] x)
(Trunc x) -> (FIDBR [5] x) (Trunc x) -> (FIDBR [5] x)
(Round x) -> (FIDBR [1] x) (RoundToEven x) -> (FIDBR [4] x)
(Round x) -> (FIDBR [1] x)
// Atomic loads. // Atomic loads.
(AtomicLoad32 ptr mem) -> (MOVWZatomicload ptr mem) (AtomicLoad32 ptr mem) -> (MOVWZatomicload ptr mem)
......
...@@ -268,10 +268,11 @@ var genericOps = []opData{ ...@@ -268,10 +268,11 @@ var genericOps = []opData{
// ±∞ → ±∞ (sign preserved) // ±∞ → ±∞ (sign preserved)
// ±0 → ±0 (sign preserved) // ±0 → ±0 (sign preserved)
// NaN → NaN // NaN → NaN
{name: "Floor", argLength: 1}, // round arg0 toward -∞ {name: "Floor", argLength: 1}, // round arg0 toward -∞
{name: "Ceil", argLength: 1}, // round arg0 toward +∞ {name: "Ceil", argLength: 1}, // round arg0 toward +∞
{name: "Trunc", argLength: 1}, // round arg0 toward 0 {name: "Trunc", argLength: 1}, // round arg0 toward 0
{name: "Round", argLength: 1}, // round arg0 to nearest, ties away from 0 {name: "Round", argLength: 1}, // round arg0 to nearest, ties away from 0
{name: "RoundToEven", argLength: 1}, // round arg0 to nearest, ties to even
// Modify the sign bit // Modify the sign bit
{name: "Abs", argLength: 1}, // absolute value arg0 {name: "Abs", argLength: 1}, // absolute value arg0
......
...@@ -1901,6 +1901,7 @@ const ( ...@@ -1901,6 +1901,7 @@ const (
OpCeil OpCeil
OpTrunc OpTrunc
OpRound OpRound
OpRoundToEven
OpAbs OpAbs
OpCopysign OpCopysign
OpPhi OpPhi
...@@ -23318,6 +23319,11 @@ var opcodeTable = [...]opInfo{ ...@@ -23318,6 +23319,11 @@ var opcodeTable = [...]opInfo{
argLen: 1, argLen: 1,
generic: true, generic: true,
}, },
{
name: "RoundToEven",
argLen: 1,
generic: true,
},
{ {
name: "Abs", name: "Abs",
argLen: 1, argLen: 1,
......
...@@ -383,6 +383,8 @@ func rewriteValueS390X(v *Value) bool { ...@@ -383,6 +383,8 @@ func rewriteValueS390X(v *Value) bool {
return rewriteValueS390X_OpRound32F_0(v) return rewriteValueS390X_OpRound32F_0(v)
case OpRound64F: case OpRound64F:
return rewriteValueS390X_OpRound64F_0(v) return rewriteValueS390X_OpRound64F_0(v)
case OpRoundToEven:
return rewriteValueS390X_OpRoundToEven_0(v)
case OpRsh16Ux16: case OpRsh16Ux16:
return rewriteValueS390X_OpRsh16Ux16_0(v) return rewriteValueS390X_OpRsh16Ux16_0(v)
case OpRsh16Ux32: case OpRsh16Ux32:
...@@ -5028,6 +5030,18 @@ func rewriteValueS390X_OpRound64F_0(v *Value) bool { ...@@ -5028,6 +5030,18 @@ func rewriteValueS390X_OpRound64F_0(v *Value) bool {
return true return true
} }
} }
func rewriteValueS390X_OpRoundToEven_0(v *Value) bool {
// match: (RoundToEven x)
// cond:
// result: (FIDBR [4] x)
for {
x := v.Args[0]
v.reset(OpS390XFIDBR)
v.AuxInt = 4
v.AddArg(x)
return true
}
}
func rewriteValueS390X_OpRsh16Ux16_0(v *Value) bool { func rewriteValueS390X_OpRsh16Ux16_0(v *Value) bool {
b := v.Block b := v.Block
_ = b _ = b
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment