cmd/compile: fix FP accuracy issue introduced by FMA optimization on ARM64

Two ARM64 rules are added to avoid FP accuracy issue, which causes build failure. https://build.golang.org/log/1360f5c9ef3f37968216350283c1013e9681725d fixes #24033 Change-Id: I9b74b584ab5cc53fa49476de275dc549adf97610 Reviewed-on: https://go-review.googlesource.com/96355Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>

cmd/compile: fix FP accuracy issue introduced by FMA optimization on ARM64
Two ARM64 rules are added to avoid FP accuracy issue, which causes build failure. https://build.golang.org/log/1360f5c9ef3f37968216350283c1013e9681725d fixes #24033 Change-Id: I9b74b584ab5cc53fa49476de275dc549adf97610 Reviewed-on: https://go-review.googlesource.com/96355Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
7113d3a5 · Ben Shi · Cherry Zhang · ef3ab3f5 · 7113d3a5 · 7113d3a5
Commit 7113d3a5 authored Feb 22, 2018 by Ben Shi Committed by Cherry Zhang Feb 22, 2018
5 changed files
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@@ -590,6 +590,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.From.Reg = v.Args[0].Reg()
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
+	case ssa.OpARM64LoweredRound32F, ssa.OpARM64LoweredRound64F:
+		// input is already rounded
 	case ssa.OpARM64VCNT:
 		p := s.Prog(v.Op.Asm())
 		p.From.Type = obj.TYPE_REG

--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
@@ -237,8 +237,8 @@
 (Cvt32Fto64F x) -> (FCVTSD x)
 (Cvt64Fto32F x) -> (FCVTDS x)

-(Round32F x) -> x
-(Round64F x) -> x
+(Round32F x) -> (LoweredRound32F x)
+(Round64F x) -> (LoweredRound64F x)

 // comparisons
 (Eq8 x y)  -> (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))

--- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
@@ -216,6 +216,8 @@ func init() {
 		{name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"},       // count leading zero, 32-bit
 		{name: "VCNT", argLength: 1, reg: fp11, asm: "VCNT"},       // count set bits for each 8-bit unit and store the result in each 8-bit unit
 		{name: "VUADDLV", argLength: 1, reg: fp11, asm: "VUADDLV"}, // unsigned sum of eight bytes in a 64-bit value, zero extended to 64-bit.
+		{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true},
+		{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true},

 		// 3-operand, the addend comes first
 		{name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"},   // +arg0 + (arg1 * arg2)

--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1003,6 +1003,8 @@ const (
 	OpARM64CLZW
 	OpARM64VCNT
 	OpARM64VUADDLV
+	OpARM64LoweredRound32F
+	OpARM64LoweredRound64F
 	OpARM64FMADDS
 	OpARM64FMADDD
 	OpARM64FNMADDS
@@ -12765,6 +12767,32 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:         "LoweredRound32F",
+		argLen:       1,
+		resultInArg0: true,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+			},
+			outputs: []outputInfo{
+				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+			},
+		},
+	},
+	{
+		name:         "LoweredRound64F",
+		argLen:       1,
+		resultInArg0: true,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+			},
+			outputs: []outputInfo{
+				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+			},
+		},
+	},
 	{
 		name:   "FMADDS",
 		argLen: 3,

--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -16318,11 +16318,10 @@ func rewriteValueARM64_OpRound_0(v *Value) bool {
 func rewriteValueARM64_OpRound32F_0(v *Value) bool {
 	// match: (Round32F x)
 	// cond:
-	// result: x
+	// result: (LoweredRound32F x)
 	for {
 		x := v.Args[0]
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpARM64LoweredRound32F)
 		v.AddArg(x)
 		return true
 	}
@@ -16330,11 +16329,10 @@ func rewriteValueARM64_OpRound32F_0(v *Value) bool {
 func rewriteValueARM64_OpRound64F_0(v *Value) bool {
 	// match: (Round64F x)
 	// cond:
-	// result: x
+	// result: (LoweredRound64F x)
 	for {
 		x := v.Args[0]
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpARM64LoweredRound64F)
 		v.AddArg(x)
 		return true
 	}