Commit cfa93ba5 authored by Robert Griesemer's avatar Robert Griesemer

math/big: add support for underscores '_' in numbers

The primary change is in nat.scan which now accepts underscores for base 0.
While at it, streamlined error handling in that function as well.
Also, improved the corresponding test significantly by checking the
expected result values also in case of scan errors.

The second major change is in scanExponent which now accepts underscores when
the new sepOk argument is set. While at it, essentially rewrote that
function to match error and underscore handling of nat.scan more closely.
Added a new test for scanExponent which until now was only tested
indirectly.

Finally, updated the documentation for several functions and added many
new test cases to clients of nat.scan.

A major portion of this CL is due to much better test coverage.

Updates #28493.

Change-Id: I7f17b361b633fbe6c798619d891bd5e0a045b5c5
Reviewed-on: https://go-review.googlesource.com/c/go/+/166157Reviewed-by: default avatarEmmanuel Odeke <emm.odeke@gmail.com>
parent c4078a19
...@@ -55,7 +55,7 @@ func (z *Float) scan(r io.ByteScanner, base int) (f *Float, b int, err error) { ...@@ -55,7 +55,7 @@ func (z *Float) scan(r io.ByteScanner, base int) (f *Float, b int, err error) {
// exponent // exponent
var exp int64 var exp int64
var ebase int var ebase int
exp, ebase, err = scanExponent(r, true) exp, ebase, err = scanExponent(r, true, base == 0)
if err != nil { if err != nil {
return return
} }
...@@ -216,20 +216,29 @@ func (z *Float) pow5(n uint64) *Float { ...@@ -216,20 +216,29 @@ func (z *Float) pow5(n uint64) *Float {
// point number with a mantissa in the given conversion base (the exponent // point number with a mantissa in the given conversion base (the exponent
// is always a decimal number), or a string representing an infinite value. // is always a decimal number), or a string representing an infinite value.
// //
// For base 0, an underscore character ``_'' may appear between a base
// prefix and an adjacent digit, and between successive digits; such
// underscores do not change the value of the number, or the returned
// digit count. Incorrect placement of underscores is reported as an
// error if there are no other errors. If base != 0, underscores are
// not recognized and thus terminate scanning like any other character
// that is not a valid radix point or digit.
//
// It sets z to the (possibly rounded) value of the corresponding floating- // It sets z to the (possibly rounded) value of the corresponding floating-
// point value, and returns z, the actual base b, and an error err, if any. // point value, and returns z, the actual base b, and an error err, if any.
// The entire string (not just a prefix) must be consumed for success. // The entire string (not just a prefix) must be consumed for success.
// If z's precision is 0, it is changed to 64 before rounding takes effect. // If z's precision is 0, it is changed to 64 before rounding takes effect.
// The number must be of the form: // The number must be of the form:
// //
// number = [ sign ] [ prefix ] mantissa [ exponent ] | infinity . // number = [ sign ] ( float | "inf" | "Inf" ) .
// sign = "+" | "-" . // sign = "+" | "-" .
// prefix = "0" ( "b" | "B" | "o" | "O" | "x" | "X" ) . // float = ( mantissa | prefix pmantissa ) [ exponent ] .
// mantissa = digits | digits "." [ digits ] | "." digits . // prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] .
// exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits . // mantissa = digits "." [ digits ] | digits | "." digits .
// digits = digit { digit } . // pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits .
// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" . // exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits .
// infinity = [ sign ] ( "inf" | "Inf" ) . // digits = digit { [ "_" ] digit } .
// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
// //
// The base argument must be 0, 2, 8, 10, or 16. Providing an invalid base // The base argument must be 0, 2, 8, 10, or 16. Providing an invalid base
// argument will lead to a run-time panic. // argument will lead to a run-time panic.
...@@ -240,11 +249,11 @@ func (z *Float) pow5(n uint64) *Float { ...@@ -240,11 +249,11 @@ func (z *Float) pow5(n uint64) *Float {
// no prefix is accepted. The octal prefix "0" is not supported (a leading // no prefix is accepted. The octal prefix "0" is not supported (a leading
// "0" is simply considered a "0"). // "0" is simply considered a "0").
// //
// A "p" or "P" exponent indicates a binary (rather then decimal) exponent; // A "p" or "P" exponent indicates a base 2 (rather then base 10) exponent;
// for instance "0x1.fffffffffffffp1023" (using base 0) represents the // for instance, "0x1.fffffffffffffp1023" (using base 0) represents the
// maximum float64 value. For hexadecimal mantissae, the exponent must // maximum float64 value. For hexadecimal mantissae, the exponent character
// be binary, if present (an "e" or "E" exponent indicator cannot be // must be one of 'p' or 'P', if present (an "e" or "E" exponent indicator
// distinguished from a mantissa digit). // cannot be distinguished from a mantissa digit).
// //
// The returned *Float f is nil and the value of z is valid but not // The returned *Float f is nil and the value of z is valid but not
// defined if an error is reported. // defined if an error is reported.
......
...@@ -72,6 +72,21 @@ func TestFloatSetFloat64String(t *testing.T) { ...@@ -72,6 +72,21 @@ func TestFloatSetFloat64String(t *testing.T) {
{"infinity", nan}, {"infinity", nan},
{"foobar", nan}, {"foobar", nan},
// invalid underscores
{"_", nan},
{"0_", nan},
{"1__0", nan},
{"123_.", nan},
{"123._", nan},
{"123._4", nan},
{"1_2.3_4_", nan},
{"_.123", nan},
{"_123.456", nan},
{"10._0", nan},
{"10.0e_0", nan},
{"10.0e0_", nan},
{"0P-0__0", nan},
// misc decimal values // misc decimal values
{"3.14159265", 3.14159265}, {"3.14159265", 3.14159265},
{"-687436.79457e-245", -687436.79457e-245}, {"-687436.79457e-245", -687436.79457e-245},
...@@ -142,6 +157,16 @@ func TestFloatSetFloat64String(t *testing.T) { ...@@ -142,6 +157,16 @@ func TestFloatSetFloat64String(t *testing.T) {
{"-0X0.00008P+16", -0.5}, {"-0X0.00008P+16", -0.5},
{"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64}, {"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64},
{"0x1.fffffffffffffp1023", math.MaxFloat64}, {"0x1.fffffffffffffp1023", math.MaxFloat64},
// underscores
{"0_0", 0},
{"1_000.", 1000},
{"1_2_3.4_5_6", 123.456},
{"1.0e0_0", 1},
{"1p+1_0", 1024},
{"0b_1000", 0x8},
{"0b_1011_1101", 0xbd},
{"0x_f0_0d_1eP+0_8", 0xf00d1e00},
} { } {
var x Float var x Float
x.SetPrec(53) x.SetPrec(53)
......
...@@ -401,16 +401,24 @@ func (x *Int) IsUint64() bool { ...@@ -401,16 +401,24 @@ func (x *Int) IsUint64() bool {
// (not just a prefix) must be valid for success. If SetString fails, // (not just a prefix) must be valid for success. If SetString fails,
// the value of z is undefined but the returned value is nil. // the value of z is undefined but the returned value is nil.
// //
// The base argument must be 0 or a value between 2 and MaxBase. If the base // The base argument must be 0 or a value between 2 and MaxBase.
// is 0, the string prefix determines the actual conversion base. A prefix of // For base 0, the number prefix determines the actual base: A prefix of
// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a // ``0b'' or ``0B'' selects base 2, ``0'', ``0o'' or ``0O'' selects base 8,
// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10. // and ``0x'' or ``0X'' selects base 16. Otherwise, the selected base is 10
// and no prefix is accepted.
// //
// For bases <= 36, lower and upper case letters are considered the same: // For bases <= 36, lower and upper case letters are considered the same:
// The letters 'a' to 'z' and 'A' to 'Z' represent digit values 10 to 35. // The letters 'a' to 'z' and 'A' to 'Z' represent digit values 10 to 35.
// For bases > 36, the upper case letters 'A' to 'Z' represent the digit // For bases > 36, the upper case letters 'A' to 'Z' represent the digit
// values 36 to 61. // values 36 to 61.
// //
// For base 0, an underscore character ``_'' may appear between a base
// prefix and an adjacent digit, and between successive digits; such
// underscores do not change the value of the number.
// Incorrect placement of underscores is reported as an error if there
// are no other errors. If base != 0, underscores are not recognized
// and act like any other character that is not a valid digit.
//
func (z *Int) SetString(s string, base int) (*Int, bool) { func (z *Int) SetString(s string, base int) (*Int, bool) {
return z.setFromScanner(strings.NewReader(s), base) return z.setFromScanner(strings.NewReader(s), base)
} }
......
...@@ -34,6 +34,16 @@ var stringTests = []struct { ...@@ -34,6 +34,16 @@ var stringTests = []struct {
{in: "0xg", base: 0}, {in: "0xg", base: 0},
{in: "g", base: 16}, {in: "g", base: 16},
// invalid inputs with separators
// (smoke tests only - a comprehensive set of tests is in natconv_test.go)
{in: "_"},
{in: "0_"},
{in: "_0"},
{in: "-1__0"},
{in: "0x10_"},
{in: "1_000", base: 10}, // separators are not permitted for bases != 0
{in: "d_e_a_d", base: 16},
// valid inputs // valid inputs
{"0", "0", 0, 0, true}, {"0", "0", 0, 0, true},
{"0", "0", 10, 0, true}, {"0", "0", 10, 0, true},
...@@ -67,6 +77,13 @@ var stringTests = []struct { ...@@ -67,6 +77,13 @@ var stringTests = []struct {
{"A", "A", 37, 36, true}, {"A", "A", 37, 36, true},
{"ABCXYZ", "abcxyz", 36, 623741435, true}, {"ABCXYZ", "abcxyz", 36, 623741435, true},
{"ABCXYZ", "ABCXYZ", 62, 33536793425, true}, {"ABCXYZ", "ABCXYZ", 62, 33536793425, true},
// valid input with separators
// (smoke tests only - a comprehensive set of tests is in natconv_test.go)
{"1_000", "1000", 0, 1000, true},
{"0b_1010", "10", 0, 10, true},
{"+0o_660", "432", 0, 0660, true},
{"-0xF00D_1E", "-15731998", 0, -0xf00d1e, true},
} }
func TestIntText(t *testing.T) { func TestIntText(t *testing.T) {
......
...@@ -55,16 +55,31 @@ func pow(x Word, n int) (p Word) { ...@@ -55,16 +55,31 @@ func pow(x Word, n int) (p Word) {
return return
} }
// scan errors
var (
errNoDigits = errors.New("number has no digits")
errInvalSep = errors.New("'_' must separate successive digits")
)
// scan scans the number corresponding to the longest possible prefix // scan scans the number corresponding to the longest possible prefix
// from r representing an unsigned number in a given conversion base. // from r representing an unsigned number in a given conversion base.
// It returns the corresponding natural number res, the actual base b, // scan returns the corresponding natural number res, the actual base b,
// a digit count, and a read or syntax error err, if any. // a digit count, and a read or syntax error err, if any.
// //
// number = [ prefix ] mantissa . // For base 0, an underscore character ``_'' may appear between a base
// prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] . // prefix and an adjacent digit, and between successive digits; such
// mantissa = digits | digits "." [ digits ] | "." digits . // underscores do not change the value of the number, or the returned
// digits = digit { digit } . // digit count. Incorrect placement of underscores is reported as an
// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" . // error if there are no other errors. If base != 0, underscores are
// not recognized and thus terminate scanning like any other character
// that is not a valid radix point or digit.
//
// number = mantissa | prefix pmantissa .
// prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] .
// mantissa = digits "." [ digits ] | digits | "." digits .
// pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits .
// digits = digit { [ "_" ] digit } .
// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
// //
// Unless fracOk is set, the base argument must be 0 or a value between // Unless fracOk is set, the base argument must be 0 or a value between
// 2 and MaxBase. If fracOk is set, the base argument must be one of // 2 and MaxBase. If fracOk is set, the base argument must be one of
...@@ -92,53 +107,51 @@ func pow(x Word, n int) (p Word) { ...@@ -92,53 +107,51 @@ func pow(x Word, n int) (p Word) {
// In this case, the actual value of the scanned number is res * b**count. // In this case, the actual value of the scanned number is res * b**count.
// //
func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count int, err error) { func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count int, err error) {
// reject illegal bases // reject invalid bases
baseOk := base == 0 || baseOk := base == 0 ||
!fracOk && 2 <= base && base <= MaxBase || !fracOk && 2 <= base && base <= MaxBase ||
fracOk && (base == 2 || base == 8 || base == 10 || base == 16) fracOk && (base == 2 || base == 8 || base == 10 || base == 16)
if !baseOk { if !baseOk {
panic(fmt.Sprintf("illegal number base %d", base)) panic(fmt.Sprintf("invalid number base %d", base))
} }
// prev encodes the previously seen char: it is one
// of '_', '0' (a digit), or '.' (anything else). A
// valid separator '_' may only occur after a digit
// and if base == 0.
prev := '.'
invalSep := false
// one char look-ahead // one char look-ahead
ch, err := r.ReadByte() ch, err := r.ReadByte()
if err != nil {
return // io.EOF is also an error in this case
}
// determine actual base // determine actual base
b, prefix := base, 0 b, prefix := base, 0
if base == 0 { if base == 0 {
// actual base is 10 unless there's a base prefix // actual base is 10 unless there's a base prefix
b = 10 b = 10
if ch == '0' { if err == nil && ch == '0' {
prev = '0'
count = 1 count = 1
ch, err = r.ReadByte() ch, err = r.ReadByte()
if err != nil { if err == nil {
if err == io.EOF { // possibly one of 0b, 0B, 0o, 0O, 0x, 0X
err = nil // not an error; input is "0" switch ch {
res = z[:0] case 'b', 'B':
} b, prefix = 2, 'b'
return case 'o', 'O':
} b, prefix = 8, 'o'
// possibly one of 0b, 0B, 0o, 0O, 0x, 0X case 'x', 'X':
switch ch { b, prefix = 16, 'x'
case 'b', 'B': default:
b, prefix = 2, 'b' if !fracOk {
case 'o', 'O': b, prefix = 8, '0'
b, prefix = 8, 'o' }
case 'x', 'X':
b, prefix = 16, 'x'
default:
if !fracOk {
b, prefix = 8, '0'
} }
} if prefix != 0 {
if prefix != 0 { count = 0 // prefix is not counted
count = 0 // prefix is not counted if prefix != '0' {
if prefix != '0' { ch, err = r.ReadByte()
if ch, err = r.ReadByte(); err != nil {
return // io.EOF is also an error in this case
} }
} }
} }
...@@ -155,76 +168,76 @@ func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count in ...@@ -155,76 +168,76 @@ func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count in
di := Word(0) // 0 <= di < b1**i < bn di := Word(0) // 0 <= di < b1**i < bn
i := 0 // 0 <= i < n i := 0 // 0 <= i < n
dp := -1 // position of decimal point dp := -1 // position of decimal point
for { for err == nil {
if fracOk && ch == '.' { if ch == '.' && fracOk {
fracOk = false fracOk = false
if prev == '_' {
invalSep = true
}
prev = '.'
dp = count dp = count
// advance } else if ch == '_' && base == 0 {
if ch, err = r.ReadByte(); err != nil { if prev != '0' {
if err == io.EOF { invalSep = true
err = nil }
break prev = '_'
} else {
// convert rune into digit value d1
var d1 Word
switch {
case '0' <= ch && ch <= '9':
d1 = Word(ch - '0')
case 'a' <= ch && ch <= 'z':
d1 = Word(ch - 'a' + 10)
case 'A' <= ch && ch <= 'Z':
if b <= maxBaseSmall {
d1 = Word(ch - 'A' + 10)
} else {
d1 = Word(ch - 'A' + maxBaseSmall)
} }
return default:
d1 = MaxBase + 1
} }
} if d1 >= b1 {
r.UnreadByte() // ch does not belong to number anymore
break
}
prev = '0'
count++
// convert rune into digit value d1 // collect d1 in di
var d1 Word di = di*b1 + d1
switch { i++
case '0' <= ch && ch <= '9':
d1 = Word(ch - '0') // if di is "full", add it to the result
case 'a' <= ch && ch <= 'z': if i == n {
d1 = Word(ch - 'a' + 10) z = z.mulAddWW(z, bn, di)
case 'A' <= ch && ch <= 'Z': di = 0
if b <= maxBaseSmall { i = 0
d1 = Word(ch - 'A' + 10)
} else {
d1 = Word(ch - 'A' + maxBaseSmall)
} }
default:
d1 = MaxBase + 1
}
if d1 >= b1 {
r.UnreadByte() // ch does not belong to number anymore
break
} }
count++
// collect d1 in di ch, err = r.ReadByte()
di = di*b1 + d1 }
i++
// if di is "full", add it to the result if err == io.EOF {
if i == n { err = nil
z = z.mulAddWW(z, bn, di) }
di = 0
i = 0
}
// advance // other errors take precedence over invalid separators
if ch, err = r.ReadByte(); err != nil { if err == nil && (invalSep || prev == '_') {
if err == io.EOF { err = errInvalSep
err = nil
break
}
return
}
} }
if count == 0 { if count == 0 {
// no digits found // no digits found
if prefix == '0' { if prefix == '0' {
// there was only the octal prefix 0 (possibly followed by digits > 7); // there was only the octal prefix 0 (possibly followed by separators and digits > 7);
// count as one digit and return base 10, not 8 // interpret as decimal 0
count = 1 return z[:0], 10, 1, err
b = 10
} else {
err = errors.New("syntax error scanning number")
} }
return err = errNoDigits // fall through; result will be 0
} }
// count > 0
// add remaining digits to result // add remaining digits to result
if i > 0 { if i > 0 {
...@@ -232,9 +245,9 @@ func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count in ...@@ -232,9 +245,9 @@ func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count in
} }
res = z.norm() res = z.norm()
// adjust for fraction, if any // adjust count for fraction, if any
if dp >= 0 { if dp >= 0 {
// 0 <= dp <= count > 0 // 0 <= dp <= count
count = dp - count count = dp - count
} }
......
...@@ -109,92 +109,126 @@ var natScanTests = []struct { ...@@ -109,92 +109,126 @@ var natScanTests = []struct {
x nat // expected nat x nat // expected nat
b int // expected base b int // expected base
count int // expected digit count count int // expected digit count
ok bool // expected success err error // expected error
next rune // next character (or 0, if at EOF) next rune // next character (or 0, if at EOF)
}{ }{
// invalid: no mantissa // invalid: no digits
{}, {"", 0, false, nil, 10, 0, errNoDigits, 0},
{s: "?"}, {"_", 0, false, nil, 10, 0, errNoDigits, 0},
{base: 10}, {"?", 0, false, nil, 10, 0, errNoDigits, '?'},
{base: 36}, {"?", 10, false, nil, 10, 0, errNoDigits, '?'},
{base: 62}, {"", 10, false, nil, 10, 0, errNoDigits, 0},
{s: "?", base: 10}, {"", 36, false, nil, 36, 0, errNoDigits, 0},
{s: "0b"}, {"", 62, false, nil, 62, 0, errNoDigits, 0},
{s: "0o"}, {"0b", 0, false, nil, 2, 0, errNoDigits, 0},
{s: "0x"}, {"0o", 0, false, nil, 8, 0, errNoDigits, 0},
{s: "0b2"}, {"0x", 0, false, nil, 16, 0, errNoDigits, 0},
{s: "0B2"}, {"0x_", 0, false, nil, 16, 0, errNoDigits, 0},
{s: "0o8"}, {"0b2", 0, false, nil, 2, 0, errNoDigits, '2'},
{s: "0O8"}, {"0B2", 0, false, nil, 2, 0, errNoDigits, '2'},
{s: "0xg"}, {"0o8", 0, false, nil, 8, 0, errNoDigits, '8'},
{s: "0Xg"}, {"0O8", 0, false, nil, 8, 0, errNoDigits, '8'},
{s: "345", base: 2}, {"0xg", 0, false, nil, 16, 0, errNoDigits, 'g'},
{"0Xg", 0, false, nil, 16, 0, errNoDigits, 'g'},
{"345", 2, false, nil, 2, 0, errNoDigits, '3'},
// invalid: incorrect use of decimal point // invalid: incorrect use of decimal point
{s: ".0"}, {"._", 0, true, nil, 10, 0, errNoDigits, 0},
{s: ".0", base: 10}, {".0", 0, false, nil, 10, 0, errNoDigits, '.'},
{s: ".", base: 0}, {".0", 10, false, nil, 10, 0, errNoDigits, '.'},
{s: "0x.0"}, {".", 0, true, nil, 10, 0, errNoDigits, 0},
{"0x.", 0, true, nil, 16, 0, errNoDigits, 0},
{"0x.g", 0, true, nil, 16, 0, errNoDigits, 'g'},
{"0x.0", 0, false, nil, 16, 0, errNoDigits, '.'},
// invalid: incorrect use of separators
{"_0", 0, false, nil, 10, 1, errInvalSep, 0},
{"0_", 0, false, nil, 10, 1, errInvalSep, 0},
{"0__0", 0, false, nil, 8, 1, errInvalSep, 0},
{"0x___0", 0, false, nil, 16, 1, errInvalSep, 0},
{"0_x", 0, false, nil, 10, 1, errInvalSep, 'x'},
{"0_8", 0, false, nil, 10, 1, errInvalSep, '8'},
{"123_.", 0, true, nat{123}, 10, 0, errInvalSep, 0},
{"._123", 0, true, nat{123}, 10, -3, errInvalSep, 0},
{"0b__1000", 0, false, nat{0x8}, 2, 4, errInvalSep, 0},
{"0o60___0", 0, false, nat{0600}, 8, 3, errInvalSep, 0},
{"0466_", 0, false, nat{0466}, 8, 3, errInvalSep, 0},
{"01234567_8", 0, false, nat{01234567}, 8, 7, errInvalSep, '8'},
{"1_.", 0, true, nat{1}, 10, 0, errInvalSep, 0},
{"0._1", 0, true, nat{1}, 10, -1, errInvalSep, 0},
{"2.7_", 0, true, nat{27}, 10, -1, errInvalSep, 0},
{"0x1.0_", 0, true, nat{0x10}, 16, -1, errInvalSep, 0},
// valid: separators are not accepted for base != 0
{"0_", 10, false, nil, 10, 1, nil, '_'},
{"1__0", 10, false, nat{1}, 10, 1, nil, '_'},
{"0__8", 10, false, nil, 10, 1, nil, '_'},
{"xy_z_", 36, false, nat{33*36 + 34}, 36, 2, nil, '_'},
// valid, no decimal point // valid, no decimal point
{"0", 0, false, nil, 10, 1, true, 0}, {"0", 0, false, nil, 10, 1, nil, 0},
{"0", 10, false, nil, 10, 1, true, 0}, {"0", 36, false, nil, 36, 1, nil, 0},
{"0", 36, false, nil, 36, 1, true, 0}, {"0", 62, false, nil, 62, 1, nil, 0},
{"0", 62, false, nil, 62, 1, true, 0}, {"1", 0, false, nat{1}, 10, 1, nil, 0},
{"1", 0, false, nat{1}, 10, 1, true, 0}, {"1", 10, false, nat{1}, 10, 1, nil, 0},
{"1", 10, false, nat{1}, 10, 1, true, 0}, {"0 ", 0, false, nil, 10, 1, nil, ' '},
{"0 ", 0, false, nil, 10, 1, true, ' '}, {"00 ", 0, false, nil, 8, 1, nil, ' '}, // octal 0
{"00 ", 0, false, nil, 8, 1, true, ' '}, // octal 0 {"0b1", 0, false, nat{1}, 2, 1, nil, 0},
{"0b1", 0, false, nat{1}, 2, 1, true, 0}, {"0B11000101", 0, false, nat{0xc5}, 2, 8, nil, 0},
{"0B11000101", 0, false, nat{0xc5}, 2, 8, true, 0}, {"0B110001012", 0, false, nat{0xc5}, 2, 8, nil, '2'},
{"0B110001012", 0, false, nat{0xc5}, 2, 8, true, '2'}, {"07", 0, false, nat{7}, 8, 1, nil, 0},
{"07", 0, false, nat{7}, 8, 1, true, 0}, {"08", 0, false, nil, 10, 1, nil, '8'},
{"08", 0, false, nil, 10, 1, true, '8'}, {"08", 10, false, nat{8}, 10, 2, nil, 0},
{"08", 10, false, nat{8}, 10, 2, true, 0}, {"018", 0, false, nat{1}, 8, 1, nil, '8'},
{"018", 0, false, nat{1}, 8, 1, true, '8'}, {"0o7", 0, false, nat{7}, 8, 1, nil, 0},
{"0o7", 0, false, nat{7}, 8, 1, true, 0}, {"0o18", 0, false, nat{1}, 8, 1, nil, '8'},
{"0o18", 0, false, nat{1}, 8, 1, true, '8'}, {"0O17", 0, false, nat{017}, 8, 2, nil, 0},
{"0O17", 0, false, nat{017}, 8, 2, true, 0}, {"03271", 0, false, nat{03271}, 8, 4, nil, 0},
{"03271", 0, false, nat{03271}, 8, 4, true, 0}, {"10ab", 0, false, nat{10}, 10, 2, nil, 'a'},
{"10ab", 0, false, nat{10}, 10, 2, true, 'a'}, {"1234567890", 0, false, nat{1234567890}, 10, 10, nil, 0},
{"1234567890", 0, false, nat{1234567890}, 10, 10, true, 0}, {"A", 36, false, nat{10}, 36, 1, nil, 0},
{"A", 36, false, nat{10}, 36, 1, true, 0}, {"A", 37, false, nat{36}, 37, 1, nil, 0},
{"A", 37, false, nat{36}, 37, 1, true, 0}, {"xyz", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, nil, 0},
{"xyz", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, true, 0}, {"XYZ?", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, nil, '?'},
{"XYZ?", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, true, '?'}, {"XYZ?", 62, false, nat{(59*62+60)*62 + 61}, 62, 3, nil, '?'},
{"XYZ?", 62, false, nat{(59*62+60)*62 + 61}, 62, 3, true, '?'}, {"0x", 16, false, nil, 16, 1, nil, 'x'},
{"0x", 16, false, nil, 16, 1, true, 'x'}, {"0xdeadbeef", 0, false, nat{0xdeadbeef}, 16, 8, nil, 0},
{"0xdeadbeef", 0, false, nat{0xdeadbeef}, 16, 8, true, 0}, {"0XDEADBEEF", 0, false, nat{0xdeadbeef}, 16, 8, nil, 0},
{"0XDEADBEEF", 0, false, nat{0xdeadbeef}, 16, 8, true, 0},
// valid, with decimal point // valid, with decimal point
{"0.", 0, false, nil, 10, 1, true, '.'}, {"0.", 0, false, nil, 10, 1, nil, '.'},
{"0.", 10, true, nil, 10, 0, true, 0}, {"0.", 10, true, nil, 10, 0, nil, 0},
{"0.1.2", 10, true, nat{1}, 10, -1, true, '.'}, {"0.1.2", 10, true, nat{1}, 10, -1, nil, '.'},
{".000", 10, true, nil, 10, -3, true, 0}, {".000", 10, true, nil, 10, -3, nil, 0},
{"12.3", 10, true, nat{123}, 10, -1, true, 0}, {"12.3", 10, true, nat{123}, 10, -1, nil, 0},
{"012.345", 10, true, nat{12345}, 10, -3, true, 0}, {"012.345", 10, true, nat{12345}, 10, -3, nil, 0},
{"0.1", 0, true, nat{1}, 10, -1, true, 0}, {"0.1", 0, true, nat{1}, 10, -1, nil, 0},
{"0.1", 2, true, nat{1}, 2, -1, true, 0}, {"0.1", 2, true, nat{1}, 2, -1, nil, 0},
{"0.12", 2, true, nat{1}, 2, -1, true, '2'}, {"0.12", 2, true, nat{1}, 2, -1, nil, '2'},
{"0b0.1", 0, true, nat{1}, 2, -1, true, 0}, {"0b0.1", 0, true, nat{1}, 2, -1, nil, 0},
{"0B0.12", 0, true, nat{1}, 2, -1, true, '2'}, {"0B0.12", 0, true, nat{1}, 2, -1, nil, '2'},
{"0o0.7", 0, true, nat{7}, 8, -1, true, 0}, {"0o0.7", 0, true, nat{7}, 8, -1, nil, 0},
{"0O0.78", 0, true, nat{7}, 8, -1, true, '8'}, {"0O0.78", 0, true, nat{7}, 8, -1, nil, '8'},
{"0xdead.beef", 0, true, nat{0xdeadbeef}, 16, -4, nil, 0},
// valid, with separators
{"1_000", 0, false, nat{1000}, 10, 4, nil, 0},
{"0_466", 0, false, nat{0466}, 8, 3, nil, 0},
{"0o_600", 0, false, nat{0600}, 8, 3, nil, 0},
{"0x_f0_0d", 0, false, nat{0xf00d}, 16, 4, nil, 0},
{"0b1000_0001", 0, false, nat{0x81}, 2, 8, nil, 0},
{"1_000.000_1", 0, true, nat{10000001}, 10, -4, nil, 0},
{"0x_f00d.1e", 0, true, nat{0xf00d1e}, 16, -2, nil, 0},
{"0x_f00d.1E2", 0, true, nat{0xf00d1e2}, 16, -3, nil, 0},
{"0x_f00d.1eg", 0, true, nat{0xf00d1e}, 16, -2, nil, 'g'},
} }
func TestScanBase(t *testing.T) { func TestScanBase(t *testing.T) {
for _, a := range natScanTests { for _, a := range natScanTests {
r := strings.NewReader(a.s) r := strings.NewReader(a.s)
x, b, count, err := nat(nil).scan(r, a.base, a.frac) x, b, count, err := nat(nil).scan(r, a.base, a.frac)
if err == nil && !a.ok { if err != a.err {
t.Errorf("scan%+v\n\texpected error", a) t.Errorf("scan%+v\n\tgot error = %v; want %v", a, err, a.err)
}
if err != nil {
if a.ok {
t.Errorf("scan%+v\n\tgot error = %s", a, err)
}
continue
} }
if x.cmp(a.x) != 0 { if x.cmp(a.x) != 0 {
t.Errorf("scan%+v\n\tgot z = %v; want %v", a, x, a.x) t.Errorf("scan%+v\n\tgot z = %v; want %v", a, x, a.x)
......
...@@ -87,7 +87,7 @@ func (z *Rat) SetString(s string) (*Rat, bool) { ...@@ -87,7 +87,7 @@ func (z *Rat) SetString(s string) (*Rat, bool) {
// exponent // exponent
var exp int64 var exp int64
exp, _, err = scanExponent(r, false) exp, _, err = scanExponent(r, false, false)
if err != nil { if err != nil {
return nil, false return nil, false
} }
...@@ -129,75 +129,96 @@ func (z *Rat) SetString(s string) (*Rat, bool) { ...@@ -129,75 +129,96 @@ func (z *Rat) SetString(s string) (*Rat, bool) {
return z, true return z, true
} }
// scanExponent scans the longest possible prefix of r representing a decimal // scanExponent scans the longest possible prefix of r representing a base 10
// ('e', 'E') or binary ('p', 'P') exponent, if any. It returns the exponent, // (``e'', ``E'') or a base 2 (``p'', ``P'') exponent, if any. It returns the
// the exponent base (10 or 2), or a read or syntax error, if any. // exponent, the exponent base (10 or 2), or a read or syntax error, if any.
//
// If sepOk is set, an underscore character ``_'' may appear between successive
// exponent digits; such underscores do not change the value of the exponent.
// Incorrect placement of underscores is reported as an error if there are no
// other errors. If sepOk is not set, underscores are not recognized and thus
// terminate scanning like any other character that is not a valid digit.
// //
// exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits . // exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits .
// sign = "+" | "-" . // sign = "+" | "-" .
// digits = digit { digit } . // digits = digit { [ '_' ] digit } .
// digit = "0" ... "9" . // digit = "0" ... "9" .
// //
// A binary exponent is only permitted if binExpOk is set. // A base 2 exponent is only permitted if base2ok is set.
func scanExponent(r io.ByteScanner, binExpOk bool) (exp int64, base int, err error) { func scanExponent(r io.ByteScanner, base2ok, sepOk bool) (exp int64, base int, err error) {
base = 10 // one char look-ahead
ch, err := r.ReadByte()
var ch byte if err != nil {
if ch, err = r.ReadByte(); err != nil {
if err == io.EOF { if err == io.EOF {
err = nil // no exponent; same as e0 err = nil
} }
return return 0, 10, err
} }
// exponent char
switch ch { switch ch {
case 'e', 'E': case 'e', 'E':
// ok base = 10
case 'p', 'P': case 'p', 'P':
if binExpOk { if base2ok {
base = 2 base = 2
break // ok break // ok
} }
fallthrough // binary exponent not permitted fallthrough // binary exponent not permitted
default: default:
r.UnreadByte() r.UnreadByte() // ch does not belong to exponent anymore
return // no exponent; same as e0 return 0, 10, nil
}
var neg bool
if neg, err = scanSign(r); err != nil {
return
} }
// sign
var digits []byte var digits []byte
if neg { ch, err = r.ReadByte()
digits = append(digits, '-') if err == nil && (ch == '+' || ch == '-') {
if ch == '-' {
digits = append(digits, '-')
}
ch, err = r.ReadByte()
} }
// no need to use nat.scan for exponent digits // prev encodes the previously seen char: it is one
// since we only care about int64 values - the // of '_', '0' (a digit), or '.' (anything else). A
// from-scratch scan is easy enough and faster // valid separator '_' may only occur after a digit.
for i := 0; ; i++ { prev := '.'
if ch, err = r.ReadByte(); err != nil { invalSep := false
if err != io.EOF || i == 0 {
return // exponent value
hasDigits := false
for err == nil {
if '0' <= ch && ch <= '9' {
digits = append(digits, ch)
prev = '0'
hasDigits = true
} else if ch == '_' && sepOk {
if prev != '0' {
invalSep = true
} }
err = nil prev = '_'
break // i > 0 } else {
} r.UnreadByte() // ch does not belong to number anymore
if ch < '0' || '9' < ch { break
if i == 0 {
r.UnreadByte()
err = fmt.Errorf("invalid exponent (missing digits)")
return
}
break // i > 0
} }
digits = append(digits, ch) ch, err = r.ReadByte()
}
if err == io.EOF {
err = nil
}
if err == nil && !hasDigits {
err = errNoDigits
}
if err == nil {
exp, err = strconv.ParseInt(string(digits), 10, 64)
}
// other errors take precedence over invalid separators
if err == nil && (invalSep || prev == '_') {
err = errInvalSep
} }
// i > 0 => we have at least one digit
exp, err = strconv.ParseInt(string(digits), 10, 64)
return return
} }
......
...@@ -7,25 +7,91 @@ package big ...@@ -7,25 +7,91 @@ package big
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"io"
"math" "math"
"strconv" "strconv"
"strings" "strings"
"testing" "testing"
) )
var exponentTests = []struct {
s string // string to be scanned
base2ok bool // true if 'p'/'P' exponents are accepted
sepOk bool // true if '_' separators are accepted
x int64 // expected exponent
b int // expected exponent base
err error // expected error
next rune // next character (or 0, if at EOF)
}{
// valid, without separators
{"", false, false, 0, 10, nil, 0},
{"1", false, false, 0, 10, nil, '1'},
{"e0", false, false, 0, 10, nil, 0},
{"E1", false, false, 1, 10, nil, 0},
{"e+10", false, false, 10, 10, nil, 0},
{"e-10", false, false, -10, 10, nil, 0},
{"e123456789a", false, false, 123456789, 10, nil, 'a'},
{"p", false, false, 0, 10, nil, 'p'},
{"P+100", false, false, 0, 10, nil, 'P'},
{"p0", true, false, 0, 2, nil, 0},
{"P-123", true, false, -123, 2, nil, 0},
{"p+0a", true, false, 0, 2, nil, 'a'},
{"p+123__", true, false, 123, 2, nil, '_'}, // '_' is not part of the number anymore
// valid, with separators
{"e+1_0", false, true, 10, 10, nil, 0},
{"e-1_0", false, true, -10, 10, nil, 0},
{"e123_456_789a", false, true, 123456789, 10, nil, 'a'},
{"P+1_00", false, true, 0, 10, nil, 'P'},
{"p-1_2_3", true, true, -123, 2, nil, 0},
// invalid: no digits
{"e", false, false, 0, 10, errNoDigits, 0},
{"ef", false, false, 0, 10, errNoDigits, 'f'},
{"e+", false, false, 0, 10, errNoDigits, 0},
{"E-x", false, false, 0, 10, errNoDigits, 'x'},
{"p", true, false, 0, 2, errNoDigits, 0},
{"P-", true, false, 0, 2, errNoDigits, 0},
{"p+e", true, false, 0, 2, errNoDigits, 'e'},
{"e+_x", false, true, 0, 10, errNoDigits, 'x'},
// invalid: incorrect use of separator
{"e0_", false, true, 0, 10, errInvalSep, 0},
{"e_0", false, true, 0, 10, errInvalSep, 0},
{"e-1_2__3", false, true, -123, 10, errInvalSep, 0},
}
func TestScanExponent(t *testing.T) {
for _, a := range exponentTests {
r := strings.NewReader(a.s)
x, b, err := scanExponent(r, a.base2ok, a.sepOk)
if err != a.err {
t.Errorf("scanExponent%+v\n\tgot error = %v; want %v", a, err, a.err)
}
if x != a.x {
t.Errorf("scanExponent%+v\n\tgot z = %v; want %v", a, x, a.x)
}
if b != a.b {
t.Errorf("scanExponent%+v\n\tgot b = %d; want %d", a, b, a.b)
}
next, _, err := r.ReadRune()
if err == io.EOF {
next = 0
err = nil
}
if err == nil && next != a.next {
t.Errorf("scanExponent%+v\n\tgot next = %q; want %q", a, next, a.next)
}
}
}
type StringTest struct { type StringTest struct {
in, out string in, out string
ok bool ok bool
} }
var setStringTests = []StringTest{ var setStringTests = []StringTest{
{"0", "0", true}, // invalid
{"-0", "0", true},
{"1", "1", true},
{"-1", "-1", true},
{"1.", "1", true},
{"1e0", "1", true},
{"1.e1", "10", true},
{in: "1e"}, {in: "1e"},
{in: "1.e"}, {in: "1.e"},
{in: "1e+14e-5"}, {in: "1e+14e-5"},
...@@ -33,6 +99,20 @@ var setStringTests = []StringTest{ ...@@ -33,6 +99,20 @@ var setStringTests = []StringTest{
{in: "r"}, {in: "r"},
{in: "a/b"}, {in: "a/b"},
{in: "a.b"}, {in: "a.b"},
{in: "1/0"},
{in: "4/3/2"}, // issue 17001
{in: "4/3/"},
{in: "4/3."},
{in: "4/"},
// valid
{"0", "0", true},
{"-0", "0", true},
{"1", "1", true},
{"-1", "-1", true},
{"1.", "1", true},
{"1e0", "1", true},
{"1.e1", "10", true},
{"-0.1", "-1/10", true}, {"-0.1", "-1/10", true},
{"-.1", "-1/10", true}, {"-.1", "-1/10", true},
{"2/4", "1/2", true}, {"2/4", "1/2", true},
...@@ -49,24 +129,35 @@ var setStringTests = []StringTest{ ...@@ -49,24 +129,35 @@ var setStringTests = []StringTest{
{"106/141787961317645621392", "53/70893980658822810696", true}, {"106/141787961317645621392", "53/70893980658822810696", true},
{"204211327800791583.81095", "4084226556015831676219/20000", true}, {"204211327800791583.81095", "4084226556015831676219/20000", true},
{"0e9999999999", "0", true}, // issue #16176 {"0e9999999999", "0", true}, // issue #16176
{in: "1/0"},
{in: "4/3/2"}, // issue 17001
{in: "4/3/"},
{in: "4/3."},
{in: "4/"},
} }
// These are not supported by fmt.Fscanf. // These are not supported by fmt.Fscanf.
var setStringTests2 = []StringTest{ var setStringTests2 = []StringTest{
// invalid
{in: "4/3x"},
// invalid with separators
// (smoke tests only - a comprehensive set of tests is in natconv_test.go)
{in: "10_/1"},
{in: "_10/1"},
{in: "1/1__0"},
{in: "1_000.0"}, // floats are base 10 which doesn't permit separators; see also issue #29799
// valid
{"0b1000/3", "8/3", true}, {"0b1000/3", "8/3", true},
{"0B1000/0x8", "1", true}, {"0B1000/0x8", "1", true},
{"-010/1", "-8", true}, // TODO(gri) should we even permit octal here? {"-010/1", "-8", true},
{"-010.", "-10", true}, {"-010.", "-10", true},
{"-0o10/1", "-8", true}, {"-0o10/1", "-8", true},
{"0x10/1", "16", true}, {"0x10/1", "16", true},
{"0x10/0x20", "1/2", true}, {"0x10/0x20", "1/2", true},
{in: "4/3x"},
// TODO(gri) add more tests // valid with separators
// (smoke tests only - a comprehensive set of tests is in natconv_test.go)
{"0b_1000/3", "8/3", true},
{"0B_10_00/0x8", "1", true},
{"0xdead/0B1101_1110_1010_1101", "1", true},
{"0B1101_1110_1010_1101/0XD_E_A_D", "1", true},
} }
func TestRatSetString(t *testing.T) { func TestRatSetString(t *testing.T) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment