Commit 60be4a24 authored by Russ Cox's avatar Russ Cox

cmd/gc: fix float32 const conversion and printing of big float consts

The float32 const conversion used to round to float64
and then use the hardware to round to float32.
Even though there was a range check before this
conversion, the double rounding introduced inaccuracy:
the round to float64 might round the value further away
from the float32 range, reaching a float64 value that
could not actually be rounded to float32. The hardware
appears to give us 0 in that case, but it is probably undefined.
Double rounding also meant that the wrong value might
be used for certain border cases.

Do the rounding the float32 ourselves, just as we already
did the rounding to float64. This makes the conversion
precise and also makes the conversion match the range check.

Finally, add some code to print very large (bigger than float64)
floating point constants in decimal floating point notation instead
of falling back to the precise but human-unreadable binary floating
point notation.

Fixes #8015.

LGTM=iant
R=golang-codereviews, iant
CC=golang-codereviews, r
https://golang.org/cl/100580044
parent 66129835
......@@ -22,19 +22,22 @@ Mpflt*
truncfltlit(Mpflt *oldv, Type *t)
{
double d;
float f;
Mpflt *fv;
Val v;
if(t == T)
return oldv;
memset(&v, 0, sizeof v);
v.ctype = CTFLT;
v.u.fval = oldv;
overflow(v, t);
fv = mal(sizeof *fv);
*fv = *oldv;
// convert large precision literal floating
// into limited precision (float64 or float32)
// botch -- this assumes that compiler fp
// has same precision as runtime fp
switch(t->etype) {
case TFLOAT64:
d = mpgetflt(fv);
......@@ -42,10 +45,9 @@ truncfltlit(Mpflt *oldv, Type *t)
break;
case TFLOAT32:
d = mpgetflt(fv);
f = d;
d = f;
d = mpgetflt32(fv);
mpmovecflt(fv, d);
break;
}
return fv;
......@@ -235,7 +237,6 @@ convlit1(Node **np, Type *t, int explicit)
n->val = toflt(n->val);
// flowthrough
case CTFLT:
overflow(n->val, t);
n->val.u.fval = truncfltlit(n->val.u.fval, t);
break;
}
......
......@@ -1254,6 +1254,7 @@ void mpxorfixfix(Mpint *a, Mpint *b);
void mpaddfltflt(Mpflt *a, Mpflt *b);
void mpdivfltflt(Mpflt *a, Mpflt *b);
double mpgetflt(Mpflt *a);
double mpgetflt32(Mpflt *a);
void mpmovecflt(Mpflt *a, double c);
void mpmulfltflt(Mpflt *a, Mpflt *b);
void mpnegflt(Mpflt *a);
......
......@@ -579,20 +579,40 @@ Fconv(Fmt *fp)
{
char buf[500];
Mpflt *fvp, fv;
double d;
double d, dexp;
int exp;
fvp = va_arg(fp->args, Mpflt*);
if(fp->flags & FmtSharp) {
// alternate form - decimal for error messages.
// for well in range, convert to double and use print's %g
if(-900 < fvp->exp && fvp->exp < 900) {
exp = fvp->exp + sigfig(fvp)*Mpscale;
if(-900 < exp && exp < 900) {
d = mpgetflt(fvp);
if(d >= 0 && (fp->flags & FmtSign))
fmtprint(fp, "+");
return fmtprint(fp, "%g", d);
return fmtprint(fp, "%g", d, exp, fvp);
}
// TODO(rsc): for well out of range, print
// an approximation like 1.234e1000
// very out of range. compute decimal approximation by hand.
// decimal exponent
dexp = fvp->exp * 0.301029995663981195; // log_10(2)
exp = (int)dexp;
// decimal mantissa
fv = *fvp;
fv.val.neg = 0;
fv.exp = 0;
d = mpgetflt(&fv);
d *= pow(10, dexp-exp);
while(d >= 9.99995) {
d /= 10;
exp++;
}
if(fvp->val.neg)
fmtprint(fp, "-");
else if(fp->flags & FmtSign)
fmtprint(fp, "+");
return fmtprint(fp, "%.5fe+%d", d, exp);
}
if(sigfig(fvp) == 0) {
......
......@@ -199,10 +199,10 @@ mpdivfltflt(Mpflt *a, Mpflt *b)
print(" = %F\n\n", a);
}
double
mpgetflt(Mpflt *a)
static double
mpgetfltN(Mpflt *a, int prec, int bias)
{
int s, i, e;
int s, i, e, minexp;
uvlong v, vm;
double f;
......@@ -226,12 +226,8 @@ mpgetflt(Mpflt *a)
return 0;
}
// the magic numbers (64, 63, 53, 10, -1074) are
// IEEE specific. this should be done machine
// independently or in the 6g half of the compiler
// pick up the mantissa and a rounding bit in a uvlong
s = 53+1;
s = prec+1;
v = 0;
for(i=Mpnorm-1; s>=Mpscale; i--) {
v = (v<<Mpscale) | a->val.a[i];
......@@ -251,14 +247,15 @@ mpgetflt(Mpflt *a)
v = (v<<s) | (a->val.a[i]>>(Mpscale-s));
// gradual underflow
e = Mpnorm*Mpscale + a->exp - 53;
if(e < -1074) {
s = -e - 1074;
if(s > 54)
s = 54;
e = Mpnorm*Mpscale + a->exp - prec;
minexp = bias+1-prec+1;
if(e < minexp) {
s = minexp - e;
if(s > prec+1)
s = prec+1;
v |= vm & ((1ULL<<s) - 1);
vm >>= s;
e = -1074;
e = minexp;
}
//print("vm=%.16llux v=%.16llux\n", vm, v);
......@@ -276,6 +273,18 @@ mpgetflt(Mpflt *a)
return f;
}
double
mpgetflt(Mpflt *a)
{
return mpgetfltN(a, 53, -1023);
}
double
mpgetflt32(Mpflt *a)
{
return mpgetfltN(a, 24, -127);
}
void
mpmovecflt(Mpflt *a, double c)
{
......
// run
// Check conversion of constant to float32/float64 near min/max boundaries.
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"fmt"
)
var cvt = []struct {
val interface{}
binary string
}{
{float32(-340282356779733661637539395458142568447), "-16777215p+104"},
{float32(-340282326356119256160033759537265639424), "-16777214p+104"},
{float32(340282326356119256160033759537265639424), "16777214p+104"},
{float32(340282356779733661637539395458142568447), "16777215p+104"},
{float64(-1.797693134862315807937289714053e+308), "-9007199254740991p+971"},
{float64(-1.797693134862315708145274237317e+308), "-9007199254740991p+971"},
{float64(-1.797693134862315608353258760581e+308), "-9007199254740990p+971"},
{float64(1.797693134862315608353258760581e+308), "9007199254740990p+971"},
{float64(1.797693134862315708145274237317e+308), "9007199254740991p+971"},
{float64(1.797693134862315807937289714053e+308), "9007199254740991p+971"},
}
func main() {
bug := false
for i, c := range cvt {
s := fmt.Sprintf("%b", c.val)
if s != c.binary {
if !bug {
bug = true
fmt.Println("BUG")
}
fmt.Printf("#%d: have %s, want %s\n", i, s, c.binary)
}
}
}
// errorcheck
// Check flagging of invalid conversion of constant to float32/float64 near min/max boundaries.
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
var x = []interface{}{
float32(-340282356779733661637539395458142568448), // ERROR "constant -3\.40282e\+38 overflows float32"
float32(-340282356779733661637539395458142568447),
float32(-340282326356119256160033759537265639424),
float32(340282326356119256160033759537265639424),
float32(340282356779733661637539395458142568447),
float32(340282356779733661637539395458142568448), // ERROR "constant 3\.40282e\+38 overflows float32"
-1e1000, // ERROR "constant -1\.00000e\+1000 overflows float64"
float64(-1.797693134862315907937289714053e+308), // ERROR "constant -1\.79769e\+308 overflows float64"
float64(-1.797693134862315807937289714053e+308),
float64(-1.797693134862315708145274237317e+308),
float64(-1.797693134862315608353258760581e+308),
float64(1.797693134862315608353258760581e+308),
float64(1.797693134862315708145274237317e+308),
float64(1.797693134862315807937289714053e+308),
float64(1.797693134862315907937289714053e+308), // ERROR "constant 1\.79769e\+308 overflows float64"
1e1000, // ERROR "constant 1\.00000e\+1000 overflows float64"
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment