Commit e0538833 authored by Robert Griesemer's avatar Robert Griesemer

math/big: implement NaN

This change introduces NaNs (for situations like Inf-Inf, etc.).
The implementation is incomplete (the four basic operations produce
a NaN if any of the operands is an Inf or a NaN); and some operations
produce incorrect accuracy for NaN arguments. These are known bugs
which are documented.

Change-Id: Ia88841209e47930681cef19f113e178f92ceeb33
Reviewed-on: https://go-review.googlesource.com/6540Reviewed-by: default avatarAlan Donovan <adonovan@google.com>
parent 9feb24f3
......@@ -11,6 +11,9 @@
// CAUTION: WORK IN PROGRESS - USE AT YOUR OWN RISK.
// TODO(gri) provide a couple of Example tests showing typical Float initialization
// and use.
package big
import (
......@@ -20,12 +23,12 @@ import (
const debugFloat = true // enable for debugging
// A Float represents a multi-precision floating point number of the form
// A nonzero Float represents a multi-precision floating point number
//
// sign × mantissa × 2**exponent
//
// with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp (with the
// exception of 0 and Inf which have a 0 mantissa and special exponents).
// with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp.
// A Float may also be +0, -0, +Inf, -Inf, or NaN.
//
// Each Float value also has a precision, rounding mode, and accuracy.
//
......@@ -39,18 +42,19 @@ const debugFloat = true // enable for debugging
// round the numeric result according to the precision and rounding mode
// of the result variable, unless specified otherwise.
//
// If the result precision is 0 (see below), it is set to the precision of
// the argument with the largest precision value before any rounding takes
// place, and the rounding mode remains unchanged. Thus, uninitialized Floats
// provided as result arguments will have their precision set to a reasonable
// value determined by the operands and their mode is the zero value for
// RoundingMode (ToNearestEven).
// If the provided result precision is 0 (see below), it is set to the
// precision of the argument with the largest precision value before any
// rounding takes place, and the rounding mode remains unchanged. Thus,
// uninitialized Floats provided as result arguments will have their
// precision set to a reasonable value determined by the operands and
// their mode is the zero value for RoundingMode (ToNearestEven).
//
// By setting the desired precision to 24 or 53 and using matching rounding
// mode (typically ToNearestEven), Float operations produce the same results
// as the corresponding float32 or float64 IEEE-754 arithmetic for normalized
// operands (no NaNs or denormalized numbers). Additionally, positive and
// negative zeros and infinities are fully supported.
// operands (including +0 and -0). Exponent underflow and overflow lead to a
// 0 or an Infinity for different values than IEEE-754 because Float exponents
// hace a much larger range.
//
// The zero (uninitialized) value for a Float is ready to use and represents
// the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven.
......@@ -64,21 +68,21 @@ type Float struct {
prec uint32
}
// TODO(gri) provide a couple of Example tests showing typical Float intialization
// and use.
// Internal representation: The mantissa bits x.mant of a Float x are stored
// in a nat slice long enough to hold up to x.prec bits; the slice may (but
// doesn't have to) be shorter if the mantissa contains trailing 0 bits.
// Unless x is a zero or an infinity, x.mant is normalized such that the
// Unless x is a zero, infinity, or NaN, x.mant is normalized such that the
// msb of x.mant == 1 (i.e., the msb is shifted all the way "to the left").
// Thus, if the mantissa has trailing 0 bits or x.prec is not a multiple
// of the the Word size _W, x.mant[0] has trailing zero bits. Zero and Inf
// values have an empty mantissa and a 0 or infExp exponent, respectively.
// of the the Word size _W, x.mant[0] has trailing zero bits. Zero, Inf, and
// NaN values have an empty mantissa and a 0, infExp, or NanExp exponent,
// respectively.
const (
MaxExp = math.MaxInt32 // largest supported exponent magnitude
infExp = -MaxExp - 1 // exponent for Inf values
MaxExp = math.MaxInt32 // largest supported exponent
MinExp = math.MinInt32 + 2 // smallest supported exponent
infExp = math.MinInt32 + 1 // exponent for Inf values
nanExp = math.MinInt32 + 0 // exponent for NaN values
MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited
)
......@@ -145,17 +149,17 @@ func (mode RoundingMode) String() string {
// SetPrec sets z's precision to prec and returns the (possibly) rounded
// value of z. Rounding occurs according to z's rounding mode if the mantissa
// cannot be represented in prec bits without loss of precision.
// If prec == 0, the result is ±0 for finite z, and ±Inf for infinite z,
// with the sign set according to z. If prec > MaxPrec, it is set to MaxPrec.
// SetPrec(0) maps all finite values to ±0; infinite and NaN values remain
// unchanged. If prec > MaxPrec, it is set to MaxPrec.
func (z *Float) SetPrec(prec uint) *Float {
z.acc = Exact // optimistically assume no rounding is needed
// handle special case
// special case
if prec == 0 {
z.prec = 0
if len(z.mant) != 0 {
// truncate and compute accuracy
z.mant = z.mant[:0]
z.exp = 0
z.setZero()
acc := Below
if z.neg {
acc = Above
......@@ -164,6 +168,7 @@ func (z *Float) SetPrec(prec uint) *Float {
}
return z
}
// general case
if prec > MaxPrec {
prec = MaxPrec
......@@ -185,14 +190,14 @@ func (z *Float) SetMode(mode RoundingMode) *Float {
}
// Prec returns the mantissa precision of x in bits.
// The result may be 0 for |x| == 0 or |x| == Inf.
// The result may be 0 for |x| == 0, |x| == Inf, or NaN.
func (x *Float) Prec() uint {
return uint(x.prec)
}
// MinPrec returns the minimum precision required to represent x exactly
// (i.e., the smallest prec before x.SetPrec(prec) would start rounding x).
// The result is 0 for ±0 and ±Inf.
// The result is 0 for ±0, ±Inf, and NaN.
func (x *Float) MinPrec() uint {
return uint(len(x.mant))*_W - x.mant.trailingZeroBits()
}
......@@ -210,18 +215,20 @@ func (x *Float) Mode() RoundingMode {
// Sign returns:
//
// -1 if x < 0
// 0 if x == 0 or x == -0
// 0 if x is ±0 or NaN
// +1 if x > 0
//
func (x *Float) Sign() int {
s := 0
if len(x.mant) != 0 || x.exp == infExp {
s = 1 // non-zero x
if debugFloat {
validate(x)
}
if len(x.mant) == 0 && x.exp != infExp {
return 0
}
if x.neg {
s = -s
return -1
}
return s
return 1
}
// MantExp breaks x into its mantissa and exponent components.
......@@ -235,14 +242,18 @@ func (x *Float) Sign() int {
//
// ( ±0).MantExp() = ±0, 0
// (±Inf).MantExp() = ±Inf, 0
// ( NaN).MantExp() = NaN, 0
//
// MantExp does not modify x; the result mant is a new Float.
func (x *Float) MantExp(z *Float) (mant *Float, exp int) {
if debugFloat {
validate(x)
}
if z == nil {
z = new(Float)
}
mant = z.Copy(x)
if x.exp != infExp {
if len(z.mant) != 0 {
exp = int(x.exp)
mant.exp = 0 // after reading x.exp (x and mant may be aliases)
}
......@@ -260,10 +271,15 @@ func (x *Float) MantExp(z *Float) (mant *Float, exp int) {
//
// z.SetMantExp( ±0, exp) = ±0
// z.SetMantExp(±Inf, exp) = ±Inf
// z.SetMantExp( NaN, exp) = NaN
//
func (z *Float) SetMantExp(mant *Float, exp int) *Float {
if debugFloat {
validate(z)
validate(mant)
}
z.Copy(mant)
if len(z.mant) == 0 || z.exp == infExp {
if len(z.mant) == 0 {
return z
}
z.setExp(int64(z.exp) + int64(exp))
......@@ -271,15 +287,15 @@ func (z *Float) SetMantExp(mant *Float, exp int) *Float {
}
// IsInt reports whether x is an integer.
// ±Inf are not considered integers.
// ±Inf and NaN are not considered integers.
func (x *Float) IsInt() bool {
if debugFloat {
validate(x)
}
// pick off easy cases
if x.exp <= 0 {
// |x| < 1 || |x| == Inf
return len(x.mant) == 0 && x.exp != infExp
// |x| < 1 || |x| == Inf || x is NaN
return len(x.mant) == 0 && x.exp == 0 // x == 0
}
// x.exp > 0
return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa
......@@ -290,31 +306,57 @@ func (x *Float) IsInt() bool {
// If sign < 0, IsInf reports whether x is negative infinity.
// If sign == 0, IsInf reports whether x is either infinity.
func (x *Float) IsInf(sign int) bool {
if debugFloat {
validate(x)
}
return x.exp == infExp && (sign == 0 || x.neg == (sign < 0))
}
// IsNaN reports whether x is a NaN.
func (x *Float) IsNaN() bool {
if debugFloat {
validate(x)
}
return x.exp == nanExp
}
func (z *Float) setZero() {
z.mant = z.mant[:0]
z.exp = 0
}
func (z *Float) setInf() {
z.mant = z.mant[:0]
z.exp = infExp
}
// setExp sets the exponent for z.
// If the exponent's magnitude is too large, z becomes ±Inf.
// If e < MinExp, z becomes ±0; if e > MaxExp, z becomes ±Inf.
func (z *Float) setExp(e int64) {
if -MaxExp <= e && e <= MaxExp {
switch {
case e < MinExp:
z.setZero()
default:
if len(z.mant) == 0 {
e = 0
}
z.exp = int32(e)
return
case e > MaxExp:
z.setInf()
}
// Inf
z.mant = z.mant[:0]
z.exp = infExp
}
// debugging support
func validate(x *Float) {
if !debugFloat {
// avoid performance bugs
panic("validate called but debugFloat is not set")
}
const msb = 1 << (_W - 1)
m := len(x.mant)
if m == 0 {
// 0.0 or Inf
if x.exp != 0 && x.exp != infExp {
// 0.0, Inf, or NaN
if x.exp != 0 && x.exp >= MinExp {
panic(fmt.Sprintf("empty matissa with invalid exponent %d", x.exp))
}
return
......@@ -342,12 +384,9 @@ func (z *Float) round(sbit uint) {
z.acc = Exact
// handle zero and Inf
// handle zero, Inf, and NaN
m := uint32(len(z.mant)) // present mantissa length in words
if m == 0 {
if z.exp != infExp {
z.exp = 0
}
return
}
// m > 0 implies z.prec > 0 (checked by validate)
......@@ -501,8 +540,7 @@ func (z *Float) setBits64(neg bool, x uint64) *Float {
z.acc = Exact
z.neg = neg
if x == 0 {
z.mant = z.mant[:0]
z.exp = 0
z.setZero()
return z
}
// x != 0
......@@ -538,25 +576,25 @@ func (z *Float) SetInt64(x int64) *Float {
// SetFloat64 sets z to the (possibly rounded) value of x and returns z.
// If z's precision is 0, it is changed to 53 (and rounding will have
// no effect).
// If x is denormalized or NaN, the result is unspecified.
// TODO(gri) should return nil in those cases
func (z *Float) SetFloat64(x float64) *Float {
if z.prec == 0 {
z.prec = 53
}
if math.IsNaN(x) {
z.SetNaN()
return z
}
z.acc = Exact
z.neg = math.Signbit(x) // handle -0 correctly
z.neg = math.Signbit(x) // handle -0, -Inf correctly
if math.IsInf(x, 0) {
z.mant = z.mant[:0]
z.exp = infExp
z.setInf()
return z
}
if x == 0 {
z.mant = z.mant[:0]
z.exp = 0
z.setZero()
return z
}
// x != 0
// normalized x != 0
fmant, exp := math.Frexp(x) // get normalized mantissa
z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
z.exp = int32(exp) // always fits
......@@ -633,8 +671,17 @@ func (z *Float) SetRat(x *Rat) *Float {
func (z *Float) SetInf(sign int) *Float {
z.acc = Exact
z.neg = sign < 0
z.setInf()
return z
}
// SetNaN sets z to a NaN value, and returns z.
// The precision of z is unchanged and the result is always Exact.
func (z *Float) SetNaN() *Float {
z.acc = Exact
z.neg = false
z.mant = z.mant[:0]
z.exp = infExp
z.exp = nanExp
return z
}
......@@ -645,12 +692,14 @@ func (z *Float) SetInf(sign int) *Float {
// mode; and z's accuracy reports the result error relative to the
// exact (not rounded) result.
func (z *Float) Set(x *Float) *Float {
// TODO(gri) what about z.acc? should it be always Exact?
if debugFloat {
validate(x)
}
z.acc = Exact
if z != x {
if z.prec == 0 {
z.prec = x.prec
}
z.acc = Exact
z.neg = x.neg
z.exp = x.exp
z.mant = z.mant.set(x.mant)
......@@ -664,6 +713,9 @@ func (z *Float) Set(x *Float) *Float {
// Copy sets z to x, with the same precision and rounding mode as x,
// and returns z.
func (z *Float) Copy(x *Float) *Float {
if debugFloat {
validate(x)
}
// TODO(gri) what about z.acc? should it be always Exact?
if z != x {
z.acc = Exact
......@@ -697,24 +749,37 @@ func high64(x nat) uint64 {
// if x is an integer and Below otherwise.
// The result is (0, Above) for x < 0, and (math.MaxUint64, Below)
// for x > math.MaxUint64.
// BUG(gri) not implemented for NaN
func (x *Float) Uint64() (uint64, Accuracy) {
if debugFloat {
validate(x)
}
switch x.ord() {
case -2, -1:
// x < 0
return 0, Above
// special cases
if len(x.mant) == 0 {
switch x.exp {
case 0:
// x == 0 || x == -0
return 0, Exact
case 1:
// 0 < x < +Inf
return 0, Exact // ±0
case infExp:
if x.neg {
return 0, Above // -Inf
}
return math.MaxUint64, Below // +Inf
case nanExp:
panic("unimplemented")
}
panic("unreachable")
}
if x.neg {
return 0, Above
}
// x > 0
if x.exp <= 0 {
// 0 < x < 1
return 0, Below
}
// 1 <= x < +Inf
// 1 <= x
if x.exp <= 64 {
// u = trunc(x) fits into a uint64
u := high64(x.mant) >> (64 - uint32(x.exp))
......@@ -723,12 +788,8 @@ func (x *Float) Uint64() (uint64, Accuracy) {
}
return u, Below // x truncated
}
fallthrough // x too large
case 2:
// x == +Inf
// x too large
return math.MaxUint64, Below
}
panic("unreachable")
}
// Int64 returns the integer resulting from truncating x towards zero.
......@@ -736,19 +797,29 @@ func (x *Float) Uint64() (uint64, Accuracy) {
// an integer, and Above (x < 0) or Below (x > 0) otherwise.
// The result is (math.MinInt64, Above) for x < math.MinInt64, and
// (math.MaxInt64, Below) for x > math.MaxInt64.
// BUG(gri) incorrect result for NaN
func (x *Float) Int64() (int64, Accuracy) {
if debugFloat {
validate(x)
}
switch x.ord() {
case -2:
// x == -Inf
return math.MinInt64, Above
// special cases
if len(x.mant) == 0 {
switch x.exp {
case 0:
// x == 0 || x == -0
return 0, Exact // ±0
case infExp:
if x.neg {
return math.MinInt64, Above // -Inf
}
return math.MaxInt64, Below // +Inf
case nanExp:
// TODO(gri) fix this
return 0, Exact
case -1, 1:
}
panic("unreachable")
}
// 0 < |x| < +Inf
acc := Below
if x.neg {
......@@ -758,6 +829,8 @@ func (x *Float) Int64() (int64, Accuracy) {
// 0 < |x| < 1
return 0, acc
}
// x.exp > 0
// 1 <= |x| < +Inf
if x.exp <= 63 {
// i = trunc(x) fits into an int64 (excluding math.MinInt64)
......@@ -777,31 +850,40 @@ func (x *Float) Int64() (int64, Accuracy) {
}
return math.MinInt64, acc
}
fallthrough
case 2:
// x == +Inf
return math.MaxInt64, Below
}
panic("unreachable")
}
// Float64 returns the closest float64 value of x
// by rounding to nearest with 53 bits precision.
// TODO(gri) implement/document error scenarios.
// BUG(gri) accuracy incorrect for NaN, doesn't handle exponent overflow
func (x *Float) Float64() (float64, Accuracy) {
// x == ±Inf
if x.exp == infExp {
if debugFloat {
validate(x)
}
// special cases
if len(x.mant) == 0 {
switch x.exp {
case 0:
if x.neg {
var zero float64
return -zero, Exact
}
return 0, Exact
case infExp:
var sign int
if x.neg {
sign = -1
}
return math.Inf(sign), Exact
case nanExp:
return math.NaN(), Exact
}
// x == 0
if len(x.mant) == 0 {
return 0, Exact
panic("unreachable")
}
// x != 0
// 0 < |x| < +Inf
var r Float
r.prec = 53
r.Set(x)
......@@ -815,37 +897,53 @@ func (x *Float) Float64() (float64, Accuracy) {
}
// Int returns the result of truncating x towards zero;
// or nil if x is an infinity.
// or nil if x is an infinity or NaN.
// The result is Exact if x.IsInt(); otherwise it is Below
// for x > 0, and Above for x < 0.
// If a non-nil *Int argument z is provided, Int stores
// the result in z instead of allocating a new Int.
// BUG(gri) accuracy incorrect for for NaN
func (x *Float) Int(z *Int) (*Int, Accuracy) {
if debugFloat {
validate(x)
}
// accuracy for inexact results
acc := Below // truncation
if z == nil {
// no need to do this for Inf and NaN
// but those are rare enough that we
// don't care
z = new(Int)
}
// special cases
if len(x.mant) == 0 {
switch x.exp {
case 0:
return z.SetInt64(0), Exact // 0
case infExp:
if x.neg {
acc = Above
return nil, Above
}
// pick off easy cases
if x.exp <= 0 {
// |x| < 1 || |x| == Inf
if x.exp == infExp {
return nil, acc // ±Inf
return nil, Below
case nanExp:
// TODO(gri) fix accuracy for NaN
return nil, Exact
}
if len(x.mant) == 0 {
acc = Exact // ±0
panic("unreachable")
}
// ±0.xxx
if z == nil {
return new(Int), acc
// 0 < |x| < +Inf
acc := Below
if x.neg {
acc = Above
}
return z.SetUint64(0), acc
if x.exp <= 0 {
// 0 < |x| < 1
return z.SetInt64(0), acc
}
// x.exp > 0
// x.mant[len(x.mant)-1] != 0
// 1 <= |x| < +Inf
// determine minimum required precision for x
allBits := uint(len(x.mant)) * _W
exp := uint(x.exp)
......@@ -870,45 +968,60 @@ func (x *Float) Int(z *Int) (*Int, Accuracy) {
}
// Rat returns the rational number corresponding to x;
// or nil if x is an infinity.
// or nil if x is an infinity or NaN.
// The result is Exact is x is not an Inf or NaN.
// If a non-nil *Rat argument z is provided, Rat stores
// the result in z instead of allocating a new Rat.
func (x *Float) Rat(z *Rat) *Rat {
// BUG(gri) incorrect accuracy for Inf, NaN.
func (x *Float) Rat(z *Rat) (*Rat, Accuracy) {
if debugFloat {
validate(x)
}
// pick off easy cases
switch x.ord() {
case -2, +2:
return nil // ±Inf
case 0:
if z == nil {
return new(Rat)
// no need to do this for Inf and NaN
// but those are rare enough that we
// don't care
z = new(Rat)
}
// special cases
if len(x.mant) == 0 {
switch x.exp {
case 0:
return z.SetInt64(0), Exact // 0
case infExp:
if x.neg {
return nil, Above
}
return nil, Below
case nanExp:
// TODO(gri) fix accuracy
return nil, Exact
}
return z.SetInt64(0)
panic("unreachable")
}
// x != 0 && x != ±Inf
// 0 <= |x| < Inf
allBits := int32(len(x.mant)) * _W
// build up numerator and denominator
if z == nil {
z = new(Rat)
}
z.a.neg = x.neg
switch {
case x.exp > allBits:
z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits))
z.b.abs = z.b.abs[:0] // == 1 (see Rat)
return z // already in normal form
// z already in normal form
default:
z.a.abs = z.a.abs.set(x.mant)
z.b.abs = z.b.abs[:0] // == 1 (see Rat)
return z // already in normal form
// z already in normal form
case x.exp < allBits:
z.a.abs = z.a.abs.set(x.mant)
t := z.b.abs.setUint64(1)
z.b.abs = t.shl(t, uint(allBits-x.exp))
return z.norm()
z.norm()
}
return z, Exact
}
// Abs sets z to the (possibly rounded) value |x| (the absolute value of x)
......@@ -928,7 +1041,7 @@ func (z *Float) Neg(x *Float) *Float {
}
// z = x + y, ignoring signs of x and y.
// x and y must not be 0 or an Inf.
// x and y must not be 0, Inf, or NaN.
func (z *Float) uadd(x, y *Float) {
// Note: This implementation requires 2 shifts most of the
// time. It is also inefficient if exponents or precisions
......@@ -972,7 +1085,7 @@ func (z *Float) uadd(x, y *Float) {
}
// z = x - y for x >= y, ignoring signs of x and y.
// x and y must not be 0 or an Inf.
// x and y must not be 0, Inf, or NaN.
func (z *Float) usub(x, y *Float) {
// This code is symmetric to uadd.
// We have not factored the common code out because
......@@ -1004,7 +1117,7 @@ func (z *Float) usub(x, y *Float) {
// operands may have cancelled each other out
if len(z.mant) == 0 {
z.acc = Exact
z.setExp(0)
z.setZero()
return
}
// len(z.mant) > 0
......@@ -1014,7 +1127,7 @@ func (z *Float) usub(x, y *Float) {
}
// z = x * y, ignoring signs of x and y.
// x and y must not be 0 or an Inf.
// x and y must not be 0, Inf, or NaN.
func (z *Float) umul(x, y *Float) {
if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
panic("umul called with 0 argument")
......@@ -1035,7 +1148,7 @@ func (z *Float) umul(x, y *Float) {
}
// z = x / y, ignoring signs of x and y.
// x and y must not be 0 or an Inf.
// x and y must not be 0, Inf, or NaN.
func (z *Float) uquo(x, y *Float) {
if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
panic("uquo called with 0 argument")
......@@ -1083,7 +1196,7 @@ func (z *Float) uquo(x, y *Float) {
}
// ucmp returns -1, 0, or 1, depending on whether x < y, x == y, or x > y,
// while ignoring the signs of x and y. x and y must not be 0 or an Inf.
// while ignoring the signs of x and y. x and y must not be 0, Inf, or NaN.
func (x *Float) ucmp(y *Float) int {
if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
panic("ucmp called with 0 argument")
......@@ -1138,6 +1251,8 @@ func (x *Float) ucmp(y *Float) int {
// roundTowardNegative; under that attribute, the sign of an exact zero
// sum (or difference) shall be −0. However, x+x = x−(−x) retains the same
// sign as x even when x is zero.
//
// See also: http://play.golang.org/p/RtH3UCt5IH
// Add sets z to the rounded sum x+y and returns z.
// If z's precision is 0, it is changed to the larger
......@@ -1146,6 +1261,7 @@ func (x *Float) ucmp(y *Float) int {
// and rounding mode; and z's accuracy reports the
// result error relative to the exact (not rounded)
// result.
// BUG(gri) If any of the operands is Inf, the result is NaN.
func (z *Float) Add(x, y *Float) *Float {
if debugFloat {
validate(x)
......@@ -1156,14 +1272,21 @@ func (z *Float) Add(x, y *Float) *Float {
z.prec = umax32(x.prec, y.prec)
}
// TODO(gri) what about -0?
if len(y.mant) == 0 {
// TODO(gri) handle Inf
return z.Set(x)
// special cases
if len(x.mant) == 0 || len(y.mant) == 0 {
if x.exp <= infExp || y.exp <= infExp {
// TODO(gri) handle Inf separately
return z.SetNaN()
}
if len(x.mant) == 0 {
// TODO(gri) handle Inf
return z.Set(y)
if len(x.mant) == 0 { // x == ±0
z.Set(y)
if len(z.mant) == 0 && z.exp == 0 {
z.neg = x.neg && y.neg // -0 + -0 == -0
}
return z
}
// y == ±0
return z.Set(x)
}
// x, y != 0
......@@ -1182,11 +1305,18 @@ func (z *Float) Add(x, y *Float) *Float {
z.usub(y, x)
}
}
// -0 is only possible for -0 + -0
if len(z.mant) == 0 {
z.neg = false
}
return z
}
// Sub sets z to the rounded difference x-y and returns z.
// Precision, rounding, and accuracy reporting are as for Add.
// BUG(gri) If any of the operands is Inf, the result is NaN.
func (z *Float) Sub(x, y *Float) *Float {
if debugFloat {
validate(x)
......@@ -1197,13 +1327,21 @@ func (z *Float) Sub(x, y *Float) *Float {
z.prec = umax32(x.prec, y.prec)
}
// TODO(gri) what about -0?
if len(y.mant) == 0 {
// TODO(gri) handle Inf
return z.Set(x)
// special cases
if len(x.mant) == 0 || len(y.mant) == 0 {
if x.exp <= infExp || y.exp <= infExp {
// TODO(gri) handle Inf separately
return z.SetNaN()
}
if len(x.mant) == 0 {
return z.Neg(y)
if len(x.mant) == 0 { // x == ±0
z.Neg(y)
if len(z.mant) == 0 && z.exp == 0 {
z.neg = x.neg && !y.neg // -0 - 0 == -0
}
return z
}
// y == ±0
return z.Set(x)
}
// x, y != 0
......@@ -1222,11 +1360,18 @@ func (z *Float) Sub(x, y *Float) *Float {
z.usub(y, x)
}
}
// -0 is only possible for -0 - 0
if len(z.mant) == 0 {
z.neg = false
}
return z
}
// Mul sets z to the rounded product x*y and returns z.
// Precision, rounding, and accuracy reporting are as for Add.
// BUG(gri) If any of the operands is Inf, the result is NaN.
func (z *Float) Mul(x, y *Float) *Float {
if debugFloat {
validate(x)
......@@ -1237,25 +1382,34 @@ func (z *Float) Mul(x, y *Float) *Float {
z.prec = umax32(x.prec, y.prec)
}
// TODO(gri) handle Inf
z.neg = x.neg != y.neg
// special cases
if len(x.mant) == 0 || len(y.mant) == 0 {
if x.exp <= infExp || y.exp <= infExp {
// TODO(gri) handle Inf separately
return z.SetNaN()
}
// x == ±0 || y == ±0
z.acc = Exact
z.setZero()
return z
}
// TODO(gri) what about -0?
if len(x.mant) == 0 || len(y.mant) == 0 {
z.neg = false
z.mant = z.mant[:0]
z.exp = 0
z.acc = Exact
z.setZero()
return z
}
// x, y != 0
z.neg = x.neg != y.neg
z.umul(x, y)
return z
}
// Quo sets z to the rounded quotient x/y and returns z.
// Precision, rounding, and accuracy reporting are as for Add.
// BUG(gri) If any of the operands is Inf, the result is NaN.
func (z *Float) Quo(x, y *Float) *Float {
if debugFloat {
validate(x)
......@@ -1266,28 +1420,36 @@ func (z *Float) Quo(x, y *Float) *Float {
z.prec = umax32(x.prec, y.prec)
}
// TODO(gri) handle Inf
// TODO(gri) check that this is correct
z.neg = x.neg != y.neg
// special cases
z.acc = Exact
if len(x.mant) == 0 || len(y.mant) == 0 {
if x.exp <= infExp || y.exp <= infExp {
// TODO(gri) handle Inf separately
return z.SetNaN()
}
if len(x.mant) == 0 {
if len(y.mant) == 0 {
z.setExp(infExp)
return z.SetNaN()
}
z.setZero()
return z
}
if len(x.mant) == 0 {
z.mant = z.mant[:0]
z.exp = 0
z.acc = Exact
// x != 0
if len(y.mant) == 0 {
z.setInf()
return z
}
}
// x, y != 0
z.uquo(x, y)
return z
}
// TODO(gri) eliminate Lsh, Rsh? We can do the same with MantExp, SetMantExp.
// Lsh sets z to the rounded x * (1<<s) and returns z.
// If z's precision is 0, it is changed to x's precision.
// Rounding is performed according to z's precision
......@@ -1300,14 +1462,11 @@ func (z *Float) Lsh(x *Float, s uint) *Float {
validate(x)
}
if z.prec == 0 {
z.prec = x.prec
z.Set(x)
if len(x.mant) != 0 {
z.setExp(int64(z.exp) + int64(s))
}
// TODO(gri) handle Inf
z.round(0)
z.setExp(int64(z.exp) + int64(s))
return z
}
......@@ -1319,14 +1478,11 @@ func (z *Float) Rsh(x *Float, s uint) *Float {
validate(x)
}
if z.prec == 0 {
z.prec = x.prec
z.Set(x)
if len(x.mant) != 0 {
z.setExp(int64(z.exp) - int64(s))
}
// TODO(gri) handle Inf
z.round(0)
z.setExp(int64(z.exp) - int64(s))
return z
}
......@@ -1337,6 +1493,8 @@ func (z *Float) Rsh(x *Float, s uint) *Float {
// +1 if x > y
//
// Infinities with matching sign are equal.
// NaN values are never equal.
// BUG(gri) comparing NaN's is not implemented
func (x *Float) Cmp(y *Float) int {
if debugFloat {
validate(x)
......@@ -1387,6 +1545,9 @@ func (x *Float) ord() int {
if x.exp == infExp {
m = 2
}
if x.exp == nanExp {
panic("unimplemented")
}
}
if x.neg {
m = -m
......
......@@ -97,6 +97,9 @@ func makeFloat(s string) *Float {
if s == "-Inf" {
return x.SetInf(-1)
}
if s == "NaN" || s == "-NaN" {
return x.SetNaN()
}
x.SetPrec(1000)
if _, ok := x.SetString(s); !ok {
panic(fmt.Sprintf("%q is not a valid float", s))
......@@ -116,6 +119,7 @@ func TestFloatSetPrec(t *testing.T) {
{"-0", 0, "-0", Exact},
{"-Inf", 0, "-Inf", Exact},
{"+Inf", 0, "+Inf", Exact},
{"NaN", 0, "NaN", Exact},
{"123", 0, "0", Below},
{"-123", 0, "-0", Above},
......@@ -123,7 +127,8 @@ func TestFloatSetPrec(t *testing.T) {
{"0", MaxPrec, "0", Exact},
{"-0", MaxPrec, "-0", Exact},
{"-Inf", MaxPrec, "-Inf", Exact},
{"-Inf", MaxPrec, "-Inf", Exact},
{"+Inf", MaxPrec, "+Inf", Exact},
{"NaN", MaxPrec, "NaN", Exact},
// just a few regular cases - general rounding is tested elsewhere
{"1.5", 1, "2", Above},
......@@ -144,8 +149,8 @@ func TestFloatSetPrec(t *testing.T) {
}
// look inside x and check correct value for x.exp
if len(x.mant) == 0 {
// ±0 or ±Inf
if x.exp != 0 && x.exp != infExp {
// ±0, ±Inf, or NaN
if x.exp != 0 && x.exp > MinExp {
t.Errorf("%s.SetPrec(%d): incorrect exponent %d", test.x, test.prec, x.exp)
}
}
......@@ -162,6 +167,7 @@ func TestFloatMinPrec(t *testing.T) {
{"-0", 0},
{"+Inf", 0},
{"-Inf", 0},
{"NaN", 0},
{"1", 1},
{"2", 1},
{"3", 2},
......@@ -188,6 +194,7 @@ func TestFloatSign(t *testing.T) {
{"+0", 0},
{"+1", +1},
{"+Inf", +1},
{"NaN", 0},
} {
x := makeFloat(test.x)
s := x.Sign()
......@@ -198,7 +205,11 @@ func TestFloatSign(t *testing.T) {
}
// feq(x, y) is like x.Cmp(y) == 0 but it also considers the sign of 0 (0 != -0).
// Caution: Two NaN's are equal with this function!
func feq(x, y *Float) bool {
if x.IsNaN() || y.IsNaN() {
return x.IsNaN() && y.IsNaN()
}
return x.Cmp(y) == 0 && x.neg == y.neg
}
......@@ -214,6 +225,7 @@ func TestFloatMantExp(t *testing.T) {
{"Inf", "+Inf", 0},
{"+Inf", "+Inf", 0},
{"-Inf", "-Inf", 0},
{"NaN", "NaN", 0},
{"1.5", "0.75", 1},
{"1.024e3", "0.5", 11},
{"-0.125", "-0.5", -2},
......@@ -251,8 +263,8 @@ func TestFloatSetMantExp(t *testing.T) {
{"+Inf", -1234, "+Inf"},
{"-Inf", -1234, "-Inf"},
{"0", -MaxExp - 1, "0"},
{"0.5", -MaxExp - 1, "+Inf"}, // exponent overflow
{"-0.5", -MaxExp - 1, "-Inf"}, // exponent overflow
{"0.5", -MaxExp - 1, "+0"}, // exponent underflow
{"-0.5", -MaxExp - 1, "-0"}, // exponent underflow
{"1", MaxExp, "+Inf"}, // exponent overflow
{"2", MaxExp - 1, "+Inf"}, // exponent overflow
{"0.75", 1, "1.5"},
......@@ -291,6 +303,7 @@ func TestFloatIsInt(t *testing.T) {
"Inf",
"+Inf",
"-Inf",
"NaN",
} {
s := strings.TrimSuffix(test, " int")
want := s != test
......@@ -304,6 +317,10 @@ func TestFloatIsInf(t *testing.T) {
// TODO(gri) implement this
}
func TestFloatIsNaN(t *testing.T) {
// TODO(gri) implement this
}
func fromBinary(s string) int64 {
x, err := strconv.ParseInt(s, 2, 64)
if err != nil {
......@@ -604,6 +621,13 @@ func TestFloatSetFloat64(t *testing.T) {
}
}
// test NaN
var f Float
f.SetFloat64(math.NaN())
if got, acc := f.Float64(); !math.IsNaN(got) || acc != Exact {
t.Errorf("got %g (%s, %s); want %g (exact)", got, f.Format('p', 0), acc, math.NaN())
}
// test basic rounding behavior (exhaustive rounding testing is done elsewhere)
const x uint64 = 0x8765432143218 // 53 bits needed
for prec := uint(1); prec <= 52; prec++ {
......@@ -715,6 +739,10 @@ func TestFloatSetInf(t *testing.T) {
}
}
func TestFloatSetNaN(t *testing.T) {
// TODO(gri) implement
}
func TestFloatUint64(t *testing.T) {
for _, test := range []struct {
x string
......@@ -736,6 +764,7 @@ func TestFloatUint64(t *testing.T) {
{"18446744073709551616", math.MaxUint64, Below},
{"1e10000", math.MaxUint64, Below},
{"+Inf", math.MaxUint64, Below},
// {"NaN", 0, Exact}, TODO(gri) enable once implemented
} {
x := makeFloat(test.x)
out, acc := x.Uint64()
......@@ -774,6 +803,7 @@ func TestFloatInt64(t *testing.T) {
{"9223372036854775808", math.MaxInt64, Below},
{"1e10000", math.MaxInt64, Below},
{"+Inf", math.MaxInt64, Below},
// {"NaN", 0, Exact}, TODO(gri) enable once implemented
} {
x := makeFloat(test.x)
out, acc := x.Int64()
......@@ -848,7 +878,7 @@ func TestFloatRat(t *testing.T) {
{"3.14159265", "7244019449799623199/2305843009213693952"},
} {
x := makeFloat(test.x).SetPrec(64)
res := x.Rat(nil)
res, acc := x.Rat(nil)
got := "nil"
if res != nil {
got = res.String()
......@@ -857,6 +887,8 @@ func TestFloatRat(t *testing.T) {
t.Errorf("%s: got %s; want %s", test.x, got, test.want)
continue
}
// TODO(gri) check accuracy
_ = acc
// inverse conversion
if res != nil {
......@@ -871,7 +903,7 @@ func TestFloatRat(t *testing.T) {
for _, f := range []string{"0", "1", "-1", "1234"} {
x := makeFloat(f)
r := new(Rat)
if res := x.Rat(r); res != r {
if res, _ := x.Rat(r); res != r {
t.Errorf("(%s).Rat is not using supplied *Rat", f)
}
}
......@@ -886,6 +918,7 @@ func TestFloatAbs(t *testing.T) {
"1e-1000",
"1e1000",
"Inf",
"NaN",
} {
p := makeFloat(test)
a := new(Float).Abs(p)
......@@ -910,6 +943,7 @@ func TestFloatNeg(t *testing.T) {
"1e-1000",
"1e1000",
"Inf",
"NaN",
} {
p1 := makeFloat(test)
n1 := makeFloat("-" + test)
......@@ -1244,6 +1278,66 @@ func TestFloatQuoSmoke(t *testing.T) {
}
}
// TestFloatArithmeticSpecialValues tests that Float operations produce
// the correct result for all combinations of regular and special value
// arguments (±0, ±Inf, NaN) and ±1 as representative for normal values.
// Operations that produce Inf or NaN results in IEEE, produce an Undef
// since we don't support infinities or NaNs.
func TestFloatArithmeticSpecialValues(t *testing.T) {
zero := 0.0
args := []float64{math.Inf(-1), -1, -zero, zero, 1, math.Inf(1), math.NaN()}
xx := new(Float)
yy := new(Float)
got := new(Float)
want := new(Float)
for i := 0; i < 4; i++ {
for _, x := range args {
xx.SetFloat64(x)
// check conversion is correct
// (no need to do this for y, since we see exactly the
// same values there)
if got, acc := xx.Float64(); !math.IsNaN(x) && (got != x || acc != Exact) {
t.Errorf("Float(%g) == %g (%s)", x, got, acc)
}
for _, y := range args {
yy.SetFloat64(y)
var op string
var z float64
switch i {
case 0:
op = "+"
z = x + y
got.Add(xx, yy)
case 1:
op = "-"
z = x - y
got.Sub(xx, yy)
case 2:
op = "*"
z = x * y
got.Mul(xx, yy)
case 3:
op = "/"
z = x / y
got.Quo(xx, yy)
default:
panic("unreachable")
}
// At the moment an Inf operand always leads to a NaN result (known bug).
// TODO(gri) remove this once the bug is fixed.
if math.IsInf(x, 0) || math.IsInf(y, 0) {
want.SetNaN()
} else {
want.SetFloat64(z)
}
if !feq(got, want) {
t.Errorf("%5g %s %5g = %5s; want %5s", x, op, y, got, want)
}
}
}
}
}
// TODO(gri) Add tests that check correctness in the presence of aliasing.
// For rounding modes ToNegativeInf and ToPositiveInf, rounding is affected
......
......@@ -245,6 +245,11 @@ func (x *Float) Append(buf []byte, format byte, prec int) []byte {
return append(buf, "Inf"...)
}
// NaN
if x.IsNaN() {
return append(buf, "NaN"...)
}
// easy formats
switch format {
case 'b':
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment