Commit 7582494e authored by Michael Munday's avatar Michael Munday

cmd/compile: add s390x intrinsics for Ceil, Floor, Round and Trunc

Ceil, Floor and Trunc are pre-existing intrinsics. Round is a new
function and has been added as an intrinsic in this CL. All of the
functions can be implemented as a single 'LOAD FP INTEGER'
instruction, FIDBR, on s390x.

name   old time/op  new time/op  delta
Ceil   2.34ns ± 0%  0.85ns ± 0%  -63.74%  (p=0.000 n=5+4)
Floor  2.33ns ± 0%  0.85ns ± 1%  -63.35%  (p=0.008 n=5+5)
Round  4.23ns ± 0%  0.85ns ± 0%  -79.89%  (p=0.000 n=5+4)
Trunc  2.35ns ± 0%  0.85ns ± 0%  -63.83%  (p=0.029 n=4+4)

Change-Id: Idee7ba24a2899d12bf9afee4eedd6b4aaad3c510
Reviewed-on: https://go-review.googlesource.com/63890
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 8802b188
......@@ -1457,6 +1457,39 @@ var linuxS390XTests = []*asmTest{
`,
pos: []string{"\tFLOGR\t"},
},
// Intrinsic tests for math.
{
fn: `
func ceil(x float64) float64 {
return math.Ceil(x)
}
`,
pos: []string{"\tFIDBR\t[$]6"},
},
{
fn: `
func floor(x float64) float64 {
return math.Floor(x)
}
`,
pos: []string{"\tFIDBR\t[$]7"},
},
{
fn: `
func round(x float64) float64 {
return math.Round(x)
}
`,
pos: []string{"\tFIDBR\t[$]1"},
},
{
fn: `
func trunc(x float64) float64 {
return math.Trunc(x)
}
`,
pos: []string{"\tFIDBR\t[$]5"},
},
{
// check that stack store is optimized away
fn: `
......
......@@ -2734,17 +2734,22 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpTrunc, types.Types[TFLOAT64], args[0])
},
sys.PPC64)
sys.PPC64, sys.S390X)
addF("math", "Ceil",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpCeil, types.Types[TFLOAT64], args[0])
},
sys.PPC64)
sys.PPC64, sys.S390X)
addF("math", "Floor",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpFloor, types.Types[TFLOAT64], args[0])
},
sys.PPC64)
sys.PPC64, sys.S390X)
addF("math", "Round",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpRound, types.Types[TFLOAT64], args[0])
},
sys.S390X)
/******** math/bits ********/
addF("math/bits", "TrailingZeros64",
......
......@@ -207,6 +207,13 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.Reg = r2
p.To.Type = obj.TYPE_REG
p.To.Reg = r
case ssa.OpS390XFIDBR:
switch v.AuxInt {
case 0, 1, 3, 4, 5, 6, 7:
opregregimm(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg(), v.AuxInt)
default:
v.Fatalf("invalid FIDBR mask: %v", v.AuxInt)
}
case ssa.OpS390XDIVD, ssa.OpS390XDIVW,
ssa.OpS390XDIVDU, ssa.OpS390XDIVWU,
ssa.OpS390XMODD, ssa.OpS390XMODW,
......
......@@ -107,7 +107,12 @@
(Bswap64 x) -> (MOVDBR x)
(Bswap32 x) -> (MOVWBR x)
(Sqrt x) -> (FSQRT x)
// math package intrinsics
(Sqrt x) -> (FSQRT x)
(Floor x) -> (FIDBR [7] x)
(Ceil x) -> (FIDBR [6] x)
(Trunc x) -> (FIDBR [5] x)
(Round x) -> (FIDBR [1] x)
// Atomic loads.
(AtomicLoad32 ptr mem) -> (MOVWZatomicload ptr mem)
......
......@@ -206,6 +206,17 @@ func init() {
{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS", resultInArg0: true}, // fp32 arg1 * arg2 - arg0
{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB", resultInArg0: true}, // fp64 arg1 * arg2 - arg0
// Round to integer, float64 only.
//
// aux | rounding mode
// ----+-----------------------------------
// 1 | round to nearest, ties away from 0
// 4 | round to nearest, ties to even
// 5 | round toward 0
// 6 | round toward +∞
// 7 | round toward -∞
{name: "FIDBR", argLength: 1, reg: fp11, asm: "FIDBR", aux: "Int8"},
{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // fp32 load
{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // fp64 load
{name: "FMOVSconst", reg: fp01, asm: "FMOVS", aux: "Float32", rematerializeable: true}, // fp32 constant
......
......@@ -255,10 +255,23 @@ var genericOps = []opData{
{name: "PopCount32", argLength: 1}, // Count bits in arg[0]
{name: "PopCount64", argLength: 1}, // Count bits in arg[0]
{name: "Sqrt", argLength: 1}, // sqrt(arg0), float64 only
{name: "Floor", argLength: 1}, // floor(arg0), float64 only
{name: "Ceil", argLength: 1}, // ceil(arg0), float64 only
{name: "Trunc", argLength: 1}, // trunc(arg0), float64 only
// Square root, float64 only.
// Special cases:
// +∞ → +∞
// ±0 → ±0 (sign preserved)
// x<0 → NaN
// NaN → NaN
{name: "Sqrt", argLength: 1}, // √arg0
// Round to integer, float64 only.
// Special cases:
// ±∞ → ±∞ (sign preserved)
// ±0 → ±0 (sign preserved)
// NaN → NaN
{name: "Floor", argLength: 1}, // round arg0 toward -∞
{name: "Ceil", argLength: 1}, // round arg0 toward +∞
{name: "Trunc", argLength: 1}, // round arg0 toward 0
{name: "Round", argLength: 1}, // round arg0 to nearest, ties away from 0
// Data movement, max argument length for Phi is indefinite so just pick
// a really large number
......
......@@ -1448,6 +1448,7 @@ const (
OpS390XFMADD
OpS390XFMSUBS
OpS390XFMSUB
OpS390XFIDBR
OpS390XFMOVSload
OpS390XFMOVDload
OpS390XFMOVSconst
......@@ -1836,6 +1837,7 @@ const (
OpFloor
OpCeil
OpTrunc
OpRound
OpPhi
OpCopy
OpConvert
......@@ -18602,6 +18604,20 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "FIDBR",
auxType: auxInt8,
argLen: 1,
asm: s390x.AFIDBR,
reg: regInfo{
inputs: []inputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
outputs: []outputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
},
},
{
name: "FMOVSload",
auxType: auxSymOff,
......@@ -22437,6 +22453,11 @@ var opcodeTable = [...]opInfo{
argLen: 1,
generic: true,
},
{
name: "Round",
argLen: 1,
generic: true,
},
{
name: "Phi",
argLen: -1,
......
......@@ -73,6 +73,8 @@ func rewriteValueS390X(v *Value) bool {
return rewriteValueS390X_OpBswap32_0(v)
case OpBswap64:
return rewriteValueS390X_OpBswap64_0(v)
case OpCeil:
return rewriteValueS390X_OpCeil_0(v)
case OpClosureCall:
return rewriteValueS390X_OpClosureCall_0(v)
case OpCom16:
......@@ -161,6 +163,8 @@ func rewriteValueS390X(v *Value) bool {
return rewriteValueS390X_OpEqB_0(v)
case OpEqPtr:
return rewriteValueS390X_OpEqPtr_0(v)
case OpFloor:
return rewriteValueS390X_OpFloor_0(v)
case OpGeq16:
return rewriteValueS390X_OpGeq16_0(v)
case OpGeq16U:
......@@ -371,6 +375,8 @@ func rewriteValueS390X(v *Value) bool {
return rewriteValueS390X_OpOr8_0(v)
case OpOrB:
return rewriteValueS390X_OpOrB_0(v)
case OpRound:
return rewriteValueS390X_OpRound_0(v)
case OpRound32F:
return rewriteValueS390X_OpRound32F_0(v)
case OpRound64F:
......@@ -685,6 +691,8 @@ func rewriteValueS390X(v *Value) bool {
return rewriteValueS390X_OpSub8_0(v)
case OpSubPtr:
return rewriteValueS390X_OpSubPtr_0(v)
case OpTrunc:
return rewriteValueS390X_OpTrunc_0(v)
case OpTrunc16to8:
return rewriteValueS390X_OpTrunc16to8_0(v)
case OpTrunc32to16:
......@@ -1172,6 +1180,18 @@ func rewriteValueS390X_OpBswap64_0(v *Value) bool {
return true
}
}
func rewriteValueS390X_OpCeil_0(v *Value) bool {
// match: (Ceil x)
// cond:
// result: (FIDBR [6] x)
for {
x := v.Args[0]
v.reset(OpS390XFIDBR)
v.AuxInt = 6
v.AddArg(x)
return true
}
}
func rewriteValueS390X_OpClosureCall_0(v *Value) bool {
// match: (ClosureCall [argwid] entry closure mem)
// cond:
......@@ -1911,6 +1931,18 @@ func rewriteValueS390X_OpEqPtr_0(v *Value) bool {
return true
}
}
func rewriteValueS390X_OpFloor_0(v *Value) bool {
// match: (Floor x)
// cond:
// result: (FIDBR [7] x)
for {
x := v.Args[0]
v.reset(OpS390XFIDBR)
v.AuxInt = 7
v.AddArg(x)
return true
}
}
func rewriteValueS390X_OpGeq16_0(v *Value) bool {
b := v.Block
_ = b
......@@ -4913,6 +4945,18 @@ func rewriteValueS390X_OpOrB_0(v *Value) bool {
return true
}
}
func rewriteValueS390X_OpRound_0(v *Value) bool {
// match: (Round x)
// cond:
// result: (FIDBR [1] x)
for {
x := v.Args[0]
v.reset(OpS390XFIDBR)
v.AuxInt = 1
v.AddArg(x)
return true
}
}
func rewriteValueS390X_OpRound32F_0(v *Value) bool {
// match: (Round32F x)
// cond:
......@@ -36200,6 +36244,18 @@ func rewriteValueS390X_OpSubPtr_0(v *Value) bool {
return true
}
}
func rewriteValueS390X_OpTrunc_0(v *Value) bool {
// match: (Trunc x)
// cond:
// result: (FIDBR [5] x)
for {
x := v.Args[0]
v.reset(OpS390XFIDBR)
v.AuxInt = 5
v.AddArg(x)
return true
}
}
func rewriteValueS390X_OpTrunc16to8_0(v *Value) bool {
// match: (Trunc16to8 x)
// cond:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment