Commit 58b03194 authored by smasher164's avatar smasher164 Committed by Keith Randall

cmd/compile: add fma intrinsic for arm

This change introduces an arm intrinsic that generates the FMULAD
instruction for the fused-multiply-add operation on systems that
support it. System support is detected via cpu.ARM.HasVFPv4. A rewrite
rule translates the generic intrinsic to FMULAD.

Updates #25819.

Change-Id: I8459e5dd1cdbdca35f88a78dbeb7d387f1e20efa
Reviewed-on: https://go-review.googlesource.com/c/go/+/142117
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 06ac2627
......@@ -226,7 +226,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = r
case ssa.OpARMSRR:
genregshift(s, arm.AMOVW, 0, v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_RR)
case ssa.OpARMMULAF, ssa.OpARMMULAD, ssa.OpARMMULSF, ssa.OpARMMULSD:
case ssa.OpARMMULAF, ssa.OpARMMULAD, ssa.OpARMMULSF, ssa.OpARMMULSD, ssa.OpARMFMULAD:
r := v.Reg()
r0 := v.Args[0].Reg()
r1 := v.Args[1].Reg()
......
......@@ -186,6 +186,7 @@ var runtimeDecls = [...]struct {
{"x86HasPOPCNT", varTag, 15},
{"x86HasSSE41", varTag, 15},
{"x86HasFMA", varTag, 15},
{"armHasVFPv4", varTag, 15},
{"arm64HasATOMICS", varTag, 15},
}
......
......@@ -242,4 +242,5 @@ func checkptrArithmetic(unsafe.Pointer, []unsafe.Pointer)
var x86HasPOPCNT bool
var x86HasSSE41 bool
var x86HasFMA bool
var armHasVFPv4 bool
var arm64HasATOMICS bool
......@@ -312,6 +312,7 @@ var (
x86HasPOPCNT,
x86HasSSE41,
x86HasFMA,
armHasVFPv4,
arm64HasATOMICS,
typedmemclr,
typedmemmove,
......
......@@ -92,6 +92,7 @@ func initssaconfig() {
x86HasPOPCNT = sysvar("x86HasPOPCNT") // bool
x86HasSSE41 = sysvar("x86HasSSE41") // bool
x86HasFMA = sysvar("x86HasFMA") // bool
armHasVFPv4 = sysvar("armHasVFPv4") // bool
arm64HasATOMICS = sysvar("arm64HasATOMICS") // bool
typedmemclr = sysfunc("typedmemclr")
typedmemmove = sysfunc("typedmemmove")
......@@ -3357,6 +3358,36 @@ func init() {
return s.variable(n, types.Types[TFLOAT64])
},
sys.AMD64)
addF("math", "Fma",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), armHasVFPv4, s.sb)
v := s.load(types.Types[TBOOL], addr)
b := s.endBlock()
b.Kind = ssa.BlockIf
b.SetControl(v)
bTrue := s.f.NewBlock(ssa.BlockPlain)
bFalse := s.f.NewBlock(ssa.BlockPlain)
bEnd := s.f.NewBlock(ssa.BlockPlain)
b.AddEdgeTo(bTrue)
b.AddEdgeTo(bFalse)
b.Likely = ssa.BranchLikely
// We have the intrinsic - use it directly.
s.startBlock(bTrue)
s.vars[n] = s.newValue3(ssa.OpFma, types.Types[TFLOAT64], args[0], args[1], args[2])
s.endBlock().AddEdgeTo(bEnd)
// Call the pure Go version.
s.startBlock(bFalse)
a := s.call(n, callNormal)
s.vars[n] = s.load(types.Types[TFLOAT64], a)
s.endBlock().AddEdgeTo(bEnd)
// Merge results.
s.startBlock(bEnd)
return s.variable(n, types.Types[TFLOAT64])
},
sys.ARM)
makeRoundAMD64 := func(op ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
......
......@@ -210,6 +210,9 @@
(Round(32|64)F x) -> x
// fused-multiply-add
(Fma x y z) -> (FMULAD z x y)
// comparisons
(Eq8 x y) -> (Equal (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
(Eq16 x y) -> (Equal (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
......
......@@ -192,6 +192,10 @@ func init() {
{name: "MULSF", argLength: 3, reg: fp31, asm: "MULSF", resultInArg0: true}, // arg0 - (arg1 * arg2)
{name: "MULSD", argLength: 3, reg: fp31, asm: "MULSD", resultInArg0: true}, // arg0 - (arg1 * arg2)
// FMULAD only exists on platforms with the VFPv4 instruction set.
// Any use must be preceded by a successful check of runtime.arm_support_vfpv4.
{name: "FMULAD", argLength: 3, reg: fp31, asm: "FMULAD", resultInArg0: true}, // arg0 + (arg1 * arg2)
{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1
{name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int32"}, // arg0 & auxInt
{name: "OR", argLength: 2, reg: gp21, asm: "ORR", commutative: true}, // arg0 | arg1
......
......@@ -925,6 +925,7 @@ const (
OpARMMULAD
OpARMMULSF
OpARMMULSD
OpARMFMULAD
OpARMAND
OpARMANDconst
OpARMOR
......@@ -12119,6 +12120,22 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "FMULAD",
argLen: 3,
resultInArg0: true,
asm: arm.AFMULAD,
reg: regInfo{
inputs: []inputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
{2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
outputs: []outputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
},
},
{
name: "AND",
argLen: 2,
......
......@@ -538,6 +538,8 @@ func rewriteValueARM(v *Value) bool {
return rewriteValueARM_OpEqB_0(v)
case OpEqPtr:
return rewriteValueARM_OpEqPtr_0(v)
case OpFma:
return rewriteValueARM_OpFma_0(v)
case OpGeq16:
return rewriteValueARM_OpGeq16_0(v)
case OpGeq16U:
......@@ -17159,6 +17161,21 @@ func rewriteValueARM_OpEqPtr_0(v *Value) bool {
return true
}
}
func rewriteValueARM_OpFma_0(v *Value) bool {
// match: (Fma x y z)
// cond:
// result: (FMULAD z x y)
for {
z := v.Args[2]
x := v.Args[0]
y := v.Args[1]
v.reset(OpARMFMULAD)
v.AddArg(z)
v.AddArg(x)
v.AddArg(y)
return true
}
}
func rewriteValueARM_OpGeq16_0(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
......
......@@ -25,5 +25,7 @@ var (
x86HasSSE41 bool
x86HasFMA bool
armHasVFPv4 bool
arm64HasATOMICS bool
)
......@@ -516,6 +516,8 @@ func cpuinit() {
x86HasSSE41 = cpu.X86.HasSSE41
x86HasFMA = cpu.X86.HasFMA
armHasVFPv4 = cpu.ARM.HasVFPv4
arm64HasATOMICS = cpu.ARM64.HasATOMICS
}
......
......@@ -109,6 +109,7 @@ func copysign(a, b, c float64) {
func fma(x, y, z float64) float64 {
// amd64:"VFMADD231SD"
// arm/6:"FMULAD"
// arm64:"FMADDD"
// s390x:"FMADD"
// ppc64:"FMADD"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment