Commit 0f19e24d authored by Lynn Boger's avatar Lynn Boger

cmd/compile: intrinsics for trunc, floor, ceil on ppc64x

This implements trunc, floor, and ceil in the math package
as intrinsics on ppc64x.  Significant improvement mainly due
to avoiding call overhead of args and return value.

BenchmarkCeil-16                    5.95          0.69          -88.40%
BenchmarkFloor-16                   5.95          0.69          -88.40%
BenchmarkTrunc-16                   5.82          0.69          -88.14%

Updates #21390

Change-Id: I951e182694f6e0c431da79c577272b81fb0ebad0
Reviewed-on: https://go-review.googlesource.com/54654
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: default avatarCarlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Reviewed-by: default avatarDavid Chase <drchase@google.com>
parent 3cb41be8
...@@ -2724,6 +2724,21 @@ func init() { ...@@ -2724,6 +2724,21 @@ func init() {
return s.newValue1(ssa.OpSqrt, types.Types[TFLOAT64], args[0]) return s.newValue1(ssa.OpSqrt, types.Types[TFLOAT64], args[0])
}, },
sys.AMD64, sys.ARM, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X) sys.AMD64, sys.ARM, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
addF("math", "Trunc",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpTrunc, types.Types[TFLOAT64], args[0])
},
sys.PPC64)
addF("math", "Ceil",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpCeil, types.Types[TFLOAT64], args[0])
},
sys.PPC64)
addF("math", "Floor",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpFloor, types.Types[TFLOAT64], args[0])
},
sys.PPC64)
/******** math/bits ********/ /******** math/bits ********/
addF("math/bits", "TrailingZeros64", addF("math/bits", "TrailingZeros64",
......
...@@ -596,7 +596,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -596,7 +596,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect. p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB: case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB:
r := v.Reg() r := v.Reg()
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
......
...@@ -74,6 +74,9 @@ ...@@ -74,6 +74,9 @@
(Round64F x) -> (LoweredRound64F x) (Round64F x) -> (LoweredRound64F x)
(Sqrt x) -> (FSQRT x) (Sqrt x) -> (FSQRT x)
(Floor x) -> (FFLOOR x)
(Ceil x) -> (FCEIL x)
(Trunc x) -> (FTRUNC x)
// Lowering constants // Lowering constants
(Const8 [val]) -> (MOVDconst [val]) (Const8 [val]) -> (MOVDconst [val])
......
...@@ -241,6 +241,9 @@ func init() { ...@@ -241,6 +241,9 @@ func init() {
{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"}, // -arg0 (floating point) {name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"}, // -arg0 (floating point)
{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"}, // sqrt(arg0) (floating point) {name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"}, // sqrt(arg0) (floating point)
{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0) (floating point, single precision) {name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0) (floating point, single precision)
{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"}, // floor(arg0), float64
{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"}, // ceil(arg0), float64
{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"}, // trunc(arg0), float64
{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"}, // arg0|aux {name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"}, // arg0|aux
{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"}, // arg0^aux {name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"}, // arg0^aux
......
...@@ -256,6 +256,9 @@ var genericOps = []opData{ ...@@ -256,6 +256,9 @@ var genericOps = []opData{
{name: "PopCount64", argLength: 1}, // Count bits in arg[0] {name: "PopCount64", argLength: 1}, // Count bits in arg[0]
{name: "Sqrt", argLength: 1}, // sqrt(arg0), float64 only {name: "Sqrt", argLength: 1}, // sqrt(arg0), float64 only
{name: "Floor", argLength: 1}, // floor(arg0), float64 only
{name: "Ceil", argLength: 1}, // ceil(arg0), float64 only
{name: "Trunc", argLength: 1}, // trunc(arg0), float64 only
// Data movement, max argument length for Phi is indefinite so just pick // Data movement, max argument length for Phi is indefinite so just pick
// a really large number // a really large number
......
...@@ -1322,6 +1322,9 @@ const ( ...@@ -1322,6 +1322,9 @@ const (
OpPPC64FNEG OpPPC64FNEG
OpPPC64FSQRT OpPPC64FSQRT
OpPPC64FSQRTS OpPPC64FSQRTS
OpPPC64FFLOOR
OpPPC64FCEIL
OpPPC64FTRUNC
OpPPC64ORconst OpPPC64ORconst
OpPPC64XORconst OpPPC64XORconst
OpPPC64ANDconst OpPPC64ANDconst
...@@ -1800,6 +1803,9 @@ const ( ...@@ -1800,6 +1803,9 @@ const (
OpPopCount32 OpPopCount32
OpPopCount64 OpPopCount64
OpSqrt OpSqrt
OpFloor
OpCeil
OpTrunc
OpPhi OpPhi
OpCopy OpCopy
OpConvert OpConvert
...@@ -16955,6 +16961,45 @@ var opcodeTable = [...]opInfo{ ...@@ -16955,6 +16961,45 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "FFLOOR",
argLen: 1,
asm: ppc64.AFRIM,
reg: regInfo{
inputs: []inputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
outputs: []outputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
},
},
{
name: "FCEIL",
argLen: 1,
asm: ppc64.AFRIP,
reg: regInfo{
inputs: []inputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
outputs: []outputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
},
},
{
name: "FTRUNC",
argLen: 1,
asm: ppc64.AFRIZ,
reg: regInfo{
inputs: []inputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
outputs: []outputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
},
},
{ {
name: "ORconst", name: "ORconst",
auxType: auxInt64, auxType: auxInt64,
...@@ -21976,6 +22021,21 @@ var opcodeTable = [...]opInfo{ ...@@ -21976,6 +22021,21 @@ var opcodeTable = [...]opInfo{
argLen: 1, argLen: 1,
generic: true, generic: true,
}, },
{
name: "Floor",
argLen: 1,
generic: true,
},
{
name: "Ceil",
argLen: 1,
generic: true,
},
{
name: "Trunc",
argLen: 1,
generic: true,
},
{ {
name: "Phi", name: "Phi",
argLen: -1, argLen: -1,
......
...@@ -73,6 +73,8 @@ func rewriteValuePPC64(v *Value) bool { ...@@ -73,6 +73,8 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpBitLen32_0(v) return rewriteValuePPC64_OpBitLen32_0(v)
case OpBitLen64: case OpBitLen64:
return rewriteValuePPC64_OpBitLen64_0(v) return rewriteValuePPC64_OpBitLen64_0(v)
case OpCeil:
return rewriteValuePPC64_OpCeil_0(v)
case OpClosureCall: case OpClosureCall:
return rewriteValuePPC64_OpClosureCall_0(v) return rewriteValuePPC64_OpClosureCall_0(v)
case OpCom16: case OpCom16:
...@@ -161,6 +163,8 @@ func rewriteValuePPC64(v *Value) bool { ...@@ -161,6 +163,8 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpEqB_0(v) return rewriteValuePPC64_OpEqB_0(v)
case OpEqPtr: case OpEqPtr:
return rewriteValuePPC64_OpEqPtr_0(v) return rewriteValuePPC64_OpEqPtr_0(v)
case OpFloor:
return rewriteValuePPC64_OpFloor_0(v)
case OpGeq16: case OpGeq16:
return rewriteValuePPC64_OpGeq16_0(v) return rewriteValuePPC64_OpGeq16_0(v)
case OpGeq16U: case OpGeq16U:
...@@ -583,6 +587,8 @@ func rewriteValuePPC64(v *Value) bool { ...@@ -583,6 +587,8 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpSub8_0(v) return rewriteValuePPC64_OpSub8_0(v)
case OpSubPtr: case OpSubPtr:
return rewriteValuePPC64_OpSubPtr_0(v) return rewriteValuePPC64_OpSubPtr_0(v)
case OpTrunc:
return rewriteValuePPC64_OpTrunc_0(v)
case OpTrunc16to8: case OpTrunc16to8:
return rewriteValuePPC64_OpTrunc16to8_0(v) return rewriteValuePPC64_OpTrunc16to8_0(v)
case OpTrunc32to16: case OpTrunc32to16:
...@@ -1070,6 +1076,17 @@ func rewriteValuePPC64_OpBitLen64_0(v *Value) bool { ...@@ -1070,6 +1076,17 @@ func rewriteValuePPC64_OpBitLen64_0(v *Value) bool {
return true return true
} }
} }
func rewriteValuePPC64_OpCeil_0(v *Value) bool {
// match: (Ceil x)
// cond:
// result: (FCEIL x)
for {
x := v.Args[0]
v.reset(OpPPC64FCEIL)
v.AddArg(x)
return true
}
}
func rewriteValuePPC64_OpClosureCall_0(v *Value) bool { func rewriteValuePPC64_OpClosureCall_0(v *Value) bool {
// match: (ClosureCall [argwid] entry closure mem) // match: (ClosureCall [argwid] entry closure mem)
// cond: // cond:
...@@ -1823,6 +1840,17 @@ func rewriteValuePPC64_OpEqPtr_0(v *Value) bool { ...@@ -1823,6 +1840,17 @@ func rewriteValuePPC64_OpEqPtr_0(v *Value) bool {
return true return true
} }
} }
func rewriteValuePPC64_OpFloor_0(v *Value) bool {
// match: (Floor x)
// cond:
// result: (FFLOOR x)
for {
x := v.Args[0]
v.reset(OpPPC64FFLOOR)
v.AddArg(x)
return true
}
}
func rewriteValuePPC64_OpGeq16_0(v *Value) bool { func rewriteValuePPC64_OpGeq16_0(v *Value) bool {
b := v.Block b := v.Block
_ = b _ = b
...@@ -10463,6 +10491,17 @@ func rewriteValuePPC64_OpSubPtr_0(v *Value) bool { ...@@ -10463,6 +10491,17 @@ func rewriteValuePPC64_OpSubPtr_0(v *Value) bool {
return true return true
} }
} }
func rewriteValuePPC64_OpTrunc_0(v *Value) bool {
// match: (Trunc x)
// cond:
// result: (FTRUNC x)
for {
x := v.Args[0]
v.reset(OpPPC64FTRUNC)
v.AddArg(x)
return true
}
}
func rewriteValuePPC64_OpTrunc16to8_0(v *Value) bool { func rewriteValuePPC64_OpTrunc16to8_0(v *Value) bool {
// match: (Trunc16to8 x) // match: (Trunc16to8 x)
// cond: // cond:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment