Commit 93a601dd authored by Akhil Indurti's avatar Akhil Indurti Committed by Keith Randall

math: add guaranteed-precision FMA implementation

Currently, the precision of the float64 multiply-add operation
(x * y) + z varies across architectures. While generated code for
ppc64, s390x, and arm64 can guarantee that there is no intermediate
rounding on those platforms, other architectures like x86, mips, and
arm will exhibit different behavior depending on available instruction
set. Consequently, applications cannot rely on results being identical
across GOARCH-dependent codepaths.

This CL introduces a software implementation that performs an IEEE 754
double-precision fused-multiply-add operation. The only supported
rounding mode is round-to-nearest ties-to-even. Separate CLs include
hardware implementations when available. Otherwise, this software
fallback is given as the default implementation.

Specifically,
    - arm64, ppc64, s390x: Uses the FMA instruction provided by all
      of these ISAs.
    - mips[64][le]: Falls back to this software implementation. Only
      release 6 of the ISA includes a strict FMA instruction with
      MADDF.D (not implementation defined). Because the number of R6
      processors in the wild is scarce, the assembly implementation
      is left as a future optimization.
    - x86: Guards the use of VFMADD213SD by checking cpu.X86.HasFMA.
    - arm: Guards the use of VFMA by checking cpu.ARM.HasVFPv4.
    - software fallback: Uses mostly integer arithmetic except
      for input that involves Inf, NaN, or zero.

Updates #25819.

Change-Id: Iadadff2219638bacc9fec78d3ab885393fea4a08
Reviewed-on: https://go-review.googlesource.com/c/go/+/127458
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 84b0e366
......@@ -2005,6 +2005,64 @@ var logbBC = []float64{
1023,
}
// Test cases were generated with Berkeley TestFloat-3e/testfloat_gen.
// http://www.jhauser.us/arithmetic/TestFloat.html.
// The default rounding mode is selected (nearest/even), and exception flags are ignored.
var fmaC = []struct{ x, y, z, want float64 }{
// Large exponent spread
{-3.999999999999087, -1.1123914289620494e-16, -7.999877929687506, -7.999877929687505},
{-262112.0000004768, -0.06251525855623184, 1.1102230248837136e-16, 16385.99945072085},
{-6.462348523533467e-27, -2.3763644720331857e-211, 4.000000000931324, 4.000000000931324},
// Effective addition
{-2.0000000037252907, 6.7904383376e-313, -3.3951933161e-313, -1.697607001654e-312},
{-0.12499999999999999, 512.007568359375, -1.4193627164960366e-16, -64.00094604492188},
{-2.7550648847397148e-39, -3.4028301595800694e+38, 0.9960937495343386, 1.9335955376735676},
{5.723369164769208e+24, 3.8149300927159385e-06, 1.84489958778182e+19, 4.028324913621874e+19},
{-0.4843749999990904, -3.6893487872543293e+19, 9.223653786709391e+18, 2.7093936974938993e+19},
{-3.8146972665201165e-06, 4.2949672959999385e+09, -2.2204460489938386e-16, -16384.000003844263},
{6.98156394130982e-309, -1.1072962560000002e+09, -4.4414561548793455e-308, -7.73065965765153e-300},
// Effective subtraction
{5e-324, 4.5, -2e-323, 0},
{5e-324, 7, -3.5e-323, 0},
{5e-324, 0.5000000000000001, -5e-324, Copysign(0, -1)},
{-2.1240680525e-314, -1.233647078189316e+308, -0.25781249999954525, -0.25780987964919844},
{8.579992955364441e-308, 0.6037391876780558, -4.4501307410480706e-308, 7.29947236107098e-309},
{-4.450143471986689e-308, -0.9960937499927239, -4.450419332475649e-308, -1.7659233458788e-310},
{1.4932076393918112, -2.2248022430460833e-308, 4.449875571054211e-308, 1.127783865601762e-308},
// Overflow
{-2.288020632214759e+38, -8.98846570988901e+307, 1.7696041796300924e+308, Inf(0)},
{1.4888652783208255e+308, -9.007199254742012e+15, -6.807282911929205e+38, Inf(-1)},
{9.142703268902826e+192, -1.3504889569802838e+296, -1.9082200803806996e-89, Inf(-1)},
// Finite x and y, but non-finite z.
{31.99218749627471, -1.7976930544991702e+308, Inf(0), Inf(0)},
{-1.7976931281784667e+308, -2.0009765625002265, Inf(-1), Inf(-1)},
// Special
{0, 0, 0, 0},
{-1.1754226043408471e-38, NaN(), Inf(0), NaN()},
{0, 0, 2.22507385643494e-308, 2.22507385643494e-308},
{-8.65697792e+09, NaN(), -7.516192799999999e+09, NaN()},
{-0.00012207403779029757, 3.221225471996093e+09, NaN(), NaN()},
{Inf(-1), 0.1252441407414153, -1.387184532981584e-76, Inf(-1)},
{Inf(0), 1.525878907671432e-05, -9.214364835452549e+18, Inf(0)},
// Random
{0.1777916152213626, -32.000015266239636, -2.2204459148334633e-16, -5.689334401293007},
{-2.0816681711722314e-16, -0.4997558592585846, -0.9465627129124969, -0.9465627129124968},
{-1.9999997615814211, 1.8518819259933516e+19, 16.874999999999996, -3.703763410463646e+19},
{-0.12499994039717421, 32767.99999976135, -2.0752587082923246e+19, -2.075258708292325e+19},
{7.705600568510257e-34, -1.801432979000528e+16, -0.17224197722973714, -0.17224197722973716},
{3.8988133103758913e-308, -0.9848632812499999, 3.893879244098556e-308, 5.40811742605814e-310},
{-0.012651981190687427, 6.911985574912436e+38, 6.669240527007144e+18, -8.745031148409496e+36},
{4.612811918325842e+18, 1.4901161193847641e-08, 2.6077032311277997e-08, 6.873625395187494e+10},
{-9.094947033611148e-13, 4.450691014249257e-308, 2.086006742350485e-308, 2.086006742346437e-308},
{-7.751454006381804e-05, 5.588653777189071e-308, -2.2207280111272877e-308, -2.2211612130544025e-308},
}
func tolerance(a, b, e float64) bool {
// Multiplying by e here can underflow denormal values to zero.
// Check a==b so that at least if a and b are small and identical
......@@ -2995,6 +3053,15 @@ func TestYn(t *testing.T) {
}
}
func TestFma(t *testing.T) {
for _, c := range fmaC {
got := Fma(c.x, c.y, c.z)
if !alike(got, c.want) {
t.Errorf("Fma(%g,%g,%g) == %g; want %g", c.x, c.y, c.z, got, c.want)
}
}
}
// Check that math functions of high angle values
// return accurate results. [Since (vf[i] + large) - large != vf[i],
// testing for Trig(vf[i] + large) == Trig(vf[i]), where large is
......@@ -3725,3 +3792,11 @@ func BenchmarkFloat32frombits(b *testing.B) {
}
GlobalF = float64(x)
}
func BenchmarkFma(b *testing.B) {
x := 0.0
for i := 0; i < b.N; i++ {
x = Fma(E, Pi, x)
}
GlobalF = x
}
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package math
import "math/bits"
func zero(x uint64) uint64 {
if x == 0 {
return 1
}
return 0
// branchless:
// return ((x>>1 | x&1) - 1) >> 63
}
func nonzero(x uint64) uint64 {
if x != 0 {
return 1
}
return 0
// branchless:
// return 1 - ((x>>1|x&1)-1)>>63
}
func shl(u1, u2 uint64, n uint) (r1, r2 uint64) {
r1 = u1<<n | u2>>(64-n) | u2<<(n-64)
r2 = u2 << n
return
}
func shr(u1, u2 uint64, n uint) (r1, r2 uint64) {
r2 = u2>>n | u1<<(64-n) | u1>>(n-64)
r1 = u1 >> n
return
}
// shrcompress compresses the bottom n+1 bits of the two-word
// value into a single bit. the result is equal to the value
// shifted to the right by n, except the result's 0th bit is
// set to the bitwise OR of the bottom n+1 bits.
func shrcompress(u1, u2 uint64, n uint) (r1, r2 uint64) {
// TODO: Performance here is really sensitive to the
// order/placement of these branches. n == 0 is common
// enough to be in the fast path. Perhaps more measurement
// needs to be done to find the optimal order/placement?
switch {
case n == 0:
return u1, u2
case n == 64:
return 0, u1 | nonzero(u2)
case n >= 128:
return 0, nonzero(u1 | u2)
case n < 64:
r1, r2 = shr(u1, u2, n)
r2 |= nonzero(u2 & (1<<n - 1))
case n < 128:
r1, r2 = shr(u1, u2, n)
r2 |= nonzero(u1&(1<<(n-64)-1) | u2)
}
return
}
func lz(u1, u2 uint64) (l int32) {
l = int32(bits.LeadingZeros64(u1))
if l == 64 {
l += int32(bits.LeadingZeros64(u2))
}
return l
}
// split splits b into sign, biased exponent, and mantissa.
// It adds the implicit 1 bit to the mantissa for normal values,
// and normalizes subnormal values.
func split(b uint64) (sign uint32, exp int32, mantissa uint64) {
sign = uint32(b >> 63)
exp = int32(b>>52) & mask
mantissa = b & fracMask
if exp == 0 {
// Normalize value if subnormal.
shift := uint(bits.LeadingZeros64(mantissa) - 11)
mantissa <<= shift
exp = 1 - int32(shift)
} else {
// Add implicit 1 bit
mantissa |= 1 << 52
}
return
}
// Fma returns x * y + z, computed with only one rounding.
func Fma(x, y, z float64) float64 {
bx, by, bz := Float64bits(x), Float64bits(y), Float64bits(z)
// Inf or NaN or zero involved. At most one rounding will occur.
if x == 0.0 || y == 0.0 || z == 0.0 || bx&uvinf == uvinf || by&uvinf == uvinf {
return x*y + z
}
// Handle non-finite z separately. Evaluating x*y+z where
// x and y are finite, but z is infinite, should always result in z.
if bz&uvinf == uvinf {
return z
}
// Inputs are (sub)normal.
// Split x, y, z into sign, exponent, mantissa.
xs, xe, xm := split(bx)
ys, ye, ym := split(by)
zs, ze, zm := split(bz)
// Compute product p = x*y as sign, exponent, two-word mantissa.
// Start with exponent. "is normal" bit isn't subtracted yet.
pe := xe + ye - bias + 1
// pm1:pm2 is the double-word mantissa for the product p.
// Shift left to leave top bit in product. Effectively
// shifts the 106-bit product to the left by 21.
pm1, pm2 := bits.Mul64(xm<<10, ym<<11)
zm1, zm2 := zm<<10, uint64(0)
ps := xs ^ ys // product sign
// normalize to 62nd bit
is62zero := uint((^pm1 >> 62) & 1)
pm1, pm2 = shl(pm1, pm2, is62zero)
pe -= int32(is62zero)
// Swap addition operands so |p| >= |z|
if pe < ze || (pe == ze && (pm1 < zm1 || (pm1 == zm1 && pm2 < zm2))) {
ps, pe, pm1, pm2, zs, ze, zm1, zm2 = zs, ze, zm1, zm2, ps, pe, pm1, pm2
}
// Align significands
zm1, zm2 = shrcompress(zm1, zm2, uint(pe-ze))
// Compute resulting significands, normalizing if necessary.
var m, c uint64
if ps == zs {
// Adding (pm1:pm2) + (zm1:zm2)
pm2, c = bits.Add64(pm2, zm2, 0)
pm1, _ = bits.Add64(pm1, zm1, c)
pe -= int32(^pm1 >> 63)
pm1, m = shrcompress(pm1, pm2, uint(64+pm1>>63))
} else {
// Subtracting (pm1:pm2) - (zm1:zm2)
// TODO: should we special-case cancellation?
pm2, c = bits.Sub64(pm2, zm2, 0)
pm1, _ = bits.Sub64(pm1, zm1, c)
nz := lz(pm1, pm2)
pe -= nz
m, pm2 = shl(pm1, pm2, uint(nz-1))
m |= nonzero(pm2)
}
// Round and break ties to even
if pe > 1022+bias || pe == 1022+bias && (m+1<<9)>>63 == 1 {
// rounded value overflows exponent range
return Float64frombits(uint64(ps)<<63 | uvinf)
}
if pe < 0 {
n := uint(-pe)
m = m>>n | nonzero(m&(1<<n-1))
pe = 0
}
m = ((m + 1<<9) >> 10) & ^zero((m&(1<<10-1))^1<<9)
pe &= -int32(nonzero(m))
return Float64frombits(uint64(ps)<<63 + uint64(pe)<<52 + m)
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment