Commit cfa93ba5 authored by Robert Griesemer's avatar Robert Griesemer

math/big: add support for underscores '_' in numbers

The primary change is in nat.scan which now accepts underscores for base 0.
While at it, streamlined error handling in that function as well.
Also, improved the corresponding test significantly by checking the
expected result values also in case of scan errors.

The second major change is in scanExponent which now accepts underscores when
the new sepOk argument is set. While at it, essentially rewrote that
function to match error and underscore handling of nat.scan more closely.
Added a new test for scanExponent which until now was only tested
indirectly.

Finally, updated the documentation for several functions and added many
new test cases to clients of nat.scan.

A major portion of this CL is due to much better test coverage.

Updates #28493.

Change-Id: I7f17b361b633fbe6c798619d891bd5e0a045b5c5
Reviewed-on: https://go-review.googlesource.com/c/go/+/166157Reviewed-by: default avatarEmmanuel Odeke <emm.odeke@gmail.com>
parent c4078a19
......@@ -55,7 +55,7 @@ func (z *Float) scan(r io.ByteScanner, base int) (f *Float, b int, err error) {
// exponent
var exp int64
var ebase int
exp, ebase, err = scanExponent(r, true)
exp, ebase, err = scanExponent(r, true, base == 0)
if err != nil {
return
}
......@@ -216,20 +216,29 @@ func (z *Float) pow5(n uint64) *Float {
// point number with a mantissa in the given conversion base (the exponent
// is always a decimal number), or a string representing an infinite value.
//
// For base 0, an underscore character ``_'' may appear between a base
// prefix and an adjacent digit, and between successive digits; such
// underscores do not change the value of the number, or the returned
// digit count. Incorrect placement of underscores is reported as an
// error if there are no other errors. If base != 0, underscores are
// not recognized and thus terminate scanning like any other character
// that is not a valid radix point or digit.
//
// It sets z to the (possibly rounded) value of the corresponding floating-
// point value, and returns z, the actual base b, and an error err, if any.
// The entire string (not just a prefix) must be consumed for success.
// If z's precision is 0, it is changed to 64 before rounding takes effect.
// The number must be of the form:
//
// number = [ sign ] [ prefix ] mantissa [ exponent ] | infinity .
// sign = "+" | "-" .
// prefix = "0" ( "b" | "B" | "o" | "O" | "x" | "X" ) .
// mantissa = digits | digits "." [ digits ] | "." digits .
// exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits .
// digits = digit { digit } .
// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
// infinity = [ sign ] ( "inf" | "Inf" ) .
// number = [ sign ] ( float | "inf" | "Inf" ) .
// sign = "+" | "-" .
// float = ( mantissa | prefix pmantissa ) [ exponent ] .
// prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] .
// mantissa = digits "." [ digits ] | digits | "." digits .
// pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits .
// exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits .
// digits = digit { [ "_" ] digit } .
// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
//
// The base argument must be 0, 2, 8, 10, or 16. Providing an invalid base
// argument will lead to a run-time panic.
......@@ -240,11 +249,11 @@ func (z *Float) pow5(n uint64) *Float {
// no prefix is accepted. The octal prefix "0" is not supported (a leading
// "0" is simply considered a "0").
//
// A "p" or "P" exponent indicates a binary (rather then decimal) exponent;
// for instance "0x1.fffffffffffffp1023" (using base 0) represents the
// maximum float64 value. For hexadecimal mantissae, the exponent must
// be binary, if present (an "e" or "E" exponent indicator cannot be
// distinguished from a mantissa digit).
// A "p" or "P" exponent indicates a base 2 (rather then base 10) exponent;
// for instance, "0x1.fffffffffffffp1023" (using base 0) represents the
// maximum float64 value. For hexadecimal mantissae, the exponent character
// must be one of 'p' or 'P', if present (an "e" or "E" exponent indicator
// cannot be distinguished from a mantissa digit).
//
// The returned *Float f is nil and the value of z is valid but not
// defined if an error is reported.
......
......@@ -72,6 +72,21 @@ func TestFloatSetFloat64String(t *testing.T) {
{"infinity", nan},
{"foobar", nan},
// invalid underscores
{"_", nan},
{"0_", nan},
{"1__0", nan},
{"123_.", nan},
{"123._", nan},
{"123._4", nan},
{"1_2.3_4_", nan},
{"_.123", nan},
{"_123.456", nan},
{"10._0", nan},
{"10.0e_0", nan},
{"10.0e0_", nan},
{"0P-0__0", nan},
// misc decimal values
{"3.14159265", 3.14159265},
{"-687436.79457e-245", -687436.79457e-245},
......@@ -142,6 +157,16 @@ func TestFloatSetFloat64String(t *testing.T) {
{"-0X0.00008P+16", -0.5},
{"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64},
{"0x1.fffffffffffffp1023", math.MaxFloat64},
// underscores
{"0_0", 0},
{"1_000.", 1000},
{"1_2_3.4_5_6", 123.456},
{"1.0e0_0", 1},
{"1p+1_0", 1024},
{"0b_1000", 0x8},
{"0b_1011_1101", 0xbd},
{"0x_f0_0d_1eP+0_8", 0xf00d1e00},
} {
var x Float
x.SetPrec(53)
......
......@@ -401,16 +401,24 @@ func (x *Int) IsUint64() bool {
// (not just a prefix) must be valid for success. If SetString fails,
// the value of z is undefined but the returned value is nil.
//
// The base argument must be 0 or a value between 2 and MaxBase. If the base
// is 0, the string prefix determines the actual conversion base. A prefix of
// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a
// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10.
// The base argument must be 0 or a value between 2 and MaxBase.
// For base 0, the number prefix determines the actual base: A prefix of
// ``0b'' or ``0B'' selects base 2, ``0'', ``0o'' or ``0O'' selects base 8,
// and ``0x'' or ``0X'' selects base 16. Otherwise, the selected base is 10
// and no prefix is accepted.
//
// For bases <= 36, lower and upper case letters are considered the same:
// The letters 'a' to 'z' and 'A' to 'Z' represent digit values 10 to 35.
// For bases > 36, the upper case letters 'A' to 'Z' represent the digit
// values 36 to 61.
//
// For base 0, an underscore character ``_'' may appear between a base
// prefix and an adjacent digit, and between successive digits; such
// underscores do not change the value of the number.
// Incorrect placement of underscores is reported as an error if there
// are no other errors. If base != 0, underscores are not recognized
// and act like any other character that is not a valid digit.
//
func (z *Int) SetString(s string, base int) (*Int, bool) {
return z.setFromScanner(strings.NewReader(s), base)
}
......
......@@ -34,6 +34,16 @@ var stringTests = []struct {
{in: "0xg", base: 0},
{in: "g", base: 16},
// invalid inputs with separators
// (smoke tests only - a comprehensive set of tests is in natconv_test.go)
{in: "_"},
{in: "0_"},
{in: "_0"},
{in: "-1__0"},
{in: "0x10_"},
{in: "1_000", base: 10}, // separators are not permitted for bases != 0
{in: "d_e_a_d", base: 16},
// valid inputs
{"0", "0", 0, 0, true},
{"0", "0", 10, 0, true},
......@@ -67,6 +77,13 @@ var stringTests = []struct {
{"A", "A", 37, 36, true},
{"ABCXYZ", "abcxyz", 36, 623741435, true},
{"ABCXYZ", "ABCXYZ", 62, 33536793425, true},
// valid input with separators
// (smoke tests only - a comprehensive set of tests is in natconv_test.go)
{"1_000", "1000", 0, 1000, true},
{"0b_1010", "10", 0, 10, true},
{"+0o_660", "432", 0, 0660, true},
{"-0xF00D_1E", "-15731998", 0, -0xf00d1e, true},
}
func TestIntText(t *testing.T) {
......
......@@ -55,16 +55,31 @@ func pow(x Word, n int) (p Word) {
return
}
// scan errors
var (
errNoDigits = errors.New("number has no digits")
errInvalSep = errors.New("'_' must separate successive digits")
)
// scan scans the number corresponding to the longest possible prefix
// from r representing an unsigned number in a given conversion base.
// It returns the corresponding natural number res, the actual base b,
// scan returns the corresponding natural number res, the actual base b,
// a digit count, and a read or syntax error err, if any.
//
// number = [ prefix ] mantissa .
// prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] .
// mantissa = digits | digits "." [ digits ] | "." digits .
// digits = digit { digit } .
// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
// For base 0, an underscore character ``_'' may appear between a base
// prefix and an adjacent digit, and between successive digits; such
// underscores do not change the value of the number, or the returned
// digit count. Incorrect placement of underscores is reported as an
// error if there are no other errors. If base != 0, underscores are
// not recognized and thus terminate scanning like any other character
// that is not a valid radix point or digit.
//
// number = mantissa | prefix pmantissa .
// prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] .
// mantissa = digits "." [ digits ] | digits | "." digits .
// pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits .
// digits = digit { [ "_" ] digit } .
// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
//
// Unless fracOk is set, the base argument must be 0 or a value between
// 2 and MaxBase. If fracOk is set, the base argument must be one of
......@@ -92,53 +107,51 @@ func pow(x Word, n int) (p Word) {
// In this case, the actual value of the scanned number is res * b**count.
//
func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count int, err error) {
// reject illegal bases
// reject invalid bases
baseOk := base == 0 ||
!fracOk && 2 <= base && base <= MaxBase ||
fracOk && (base == 2 || base == 8 || base == 10 || base == 16)
if !baseOk {
panic(fmt.Sprintf("illegal number base %d", base))
panic(fmt.Sprintf("invalid number base %d", base))
}
// prev encodes the previously seen char: it is one
// of '_', '0' (a digit), or '.' (anything else). A
// valid separator '_' may only occur after a digit
// and if base == 0.
prev := '.'
invalSep := false
// one char look-ahead
ch, err := r.ReadByte()
if err != nil {
return // io.EOF is also an error in this case
}
// determine actual base
b, prefix := base, 0
if base == 0 {
// actual base is 10 unless there's a base prefix
b = 10
if ch == '0' {
if err == nil && ch == '0' {
prev = '0'
count = 1
ch, err = r.ReadByte()
if err != nil {
if err == io.EOF {
err = nil // not an error; input is "0"
res = z[:0]
}
return
}
// possibly one of 0b, 0B, 0o, 0O, 0x, 0X
switch ch {
case 'b', 'B':
b, prefix = 2, 'b'
case 'o', 'O':
b, prefix = 8, 'o'
case 'x', 'X':
b, prefix = 16, 'x'
default:
if !fracOk {
b, prefix = 8, '0'
if err == nil {
// possibly one of 0b, 0B, 0o, 0O, 0x, 0X
switch ch {
case 'b', 'B':
b, prefix = 2, 'b'
case 'o', 'O':
b, prefix = 8, 'o'
case 'x', 'X':
b, prefix = 16, 'x'
default:
if !fracOk {
b, prefix = 8, '0'
}
}
}
if prefix != 0 {
count = 0 // prefix is not counted
if prefix != '0' {
if ch, err = r.ReadByte(); err != nil {
return // io.EOF is also an error in this case
if prefix != 0 {
count = 0 // prefix is not counted
if prefix != '0' {
ch, err = r.ReadByte()
}
}
}
......@@ -155,76 +168,76 @@ func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count in
di := Word(0) // 0 <= di < b1**i < bn
i := 0 // 0 <= i < n
dp := -1 // position of decimal point
for {
if fracOk && ch == '.' {
for err == nil {
if ch == '.' && fracOk {
fracOk = false
if prev == '_' {
invalSep = true
}
prev = '.'
dp = count
// advance
if ch, err = r.ReadByte(); err != nil {
if err == io.EOF {
err = nil
break
} else if ch == '_' && base == 0 {
if prev != '0' {
invalSep = true
}
prev = '_'
} else {
// convert rune into digit value d1
var d1 Word
switch {
case '0' <= ch && ch <= '9':
d1 = Word(ch - '0')
case 'a' <= ch && ch <= 'z':
d1 = Word(ch - 'a' + 10)
case 'A' <= ch && ch <= 'Z':
if b <= maxBaseSmall {
d1 = Word(ch - 'A' + 10)
} else {
d1 = Word(ch - 'A' + maxBaseSmall)
}
return
default:
d1 = MaxBase + 1
}
}
if d1 >= b1 {
r.UnreadByte() // ch does not belong to number anymore
break
}
prev = '0'
count++
// convert rune into digit value d1
var d1 Word
switch {
case '0' <= ch && ch <= '9':
d1 = Word(ch - '0')
case 'a' <= ch && ch <= 'z':
d1 = Word(ch - 'a' + 10)
case 'A' <= ch && ch <= 'Z':
if b <= maxBaseSmall {
d1 = Word(ch - 'A' + 10)
} else {
d1 = Word(ch - 'A' + maxBaseSmall)
// collect d1 in di
di = di*b1 + d1
i++
// if di is "full", add it to the result
if i == n {
z = z.mulAddWW(z, bn, di)
di = 0
i = 0
}
default:
d1 = MaxBase + 1
}
if d1 >= b1 {
r.UnreadByte() // ch does not belong to number anymore
break
}
count++
// collect d1 in di
di = di*b1 + d1
i++
ch, err = r.ReadByte()
}
// if di is "full", add it to the result
if i == n {
z = z.mulAddWW(z, bn, di)
di = 0
i = 0
}
if err == io.EOF {
err = nil
}
// advance
if ch, err = r.ReadByte(); err != nil {
if err == io.EOF {
err = nil
break
}
return
}
// other errors take precedence over invalid separators
if err == nil && (invalSep || prev == '_') {
err = errInvalSep
}
if count == 0 {
// no digits found
if prefix == '0' {
// there was only the octal prefix 0 (possibly followed by digits > 7);
// count as one digit and return base 10, not 8
count = 1
b = 10
} else {
err = errors.New("syntax error scanning number")
// there was only the octal prefix 0 (possibly followed by separators and digits > 7);
// interpret as decimal 0
return z[:0], 10, 1, err
}
return
err = errNoDigits // fall through; result will be 0
}
// count > 0
// add remaining digits to result
if i > 0 {
......@@ -232,9 +245,9 @@ func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count in
}
res = z.norm()
// adjust for fraction, if any
// adjust count for fraction, if any
if dp >= 0 {
// 0 <= dp <= count > 0
// 0 <= dp <= count
count = dp - count
}
......
......@@ -109,92 +109,126 @@ var natScanTests = []struct {
x nat // expected nat
b int // expected base
count int // expected digit count
ok bool // expected success
err error // expected error
next rune // next character (or 0, if at EOF)
}{
// invalid: no mantissa
{},
{s: "?"},
{base: 10},
{base: 36},
{base: 62},
{s: "?", base: 10},
{s: "0b"},
{s: "0o"},
{s: "0x"},
{s: "0b2"},
{s: "0B2"},
{s: "0o8"},
{s: "0O8"},
{s: "0xg"},
{s: "0Xg"},
{s: "345", base: 2},
// invalid: no digits
{"", 0, false, nil, 10, 0, errNoDigits, 0},
{"_", 0, false, nil, 10, 0, errNoDigits, 0},
{"?", 0, false, nil, 10, 0, errNoDigits, '?'},
{"?", 10, false, nil, 10, 0, errNoDigits, '?'},
{"", 10, false, nil, 10, 0, errNoDigits, 0},
{"", 36, false, nil, 36, 0, errNoDigits, 0},
{"", 62, false, nil, 62, 0, errNoDigits, 0},
{"0b", 0, false, nil, 2, 0, errNoDigits, 0},
{"0o", 0, false, nil, 8, 0, errNoDigits, 0},
{"0x", 0, false, nil, 16, 0, errNoDigits, 0},
{"0x_", 0, false, nil, 16, 0, errNoDigits, 0},
{"0b2", 0, false, nil, 2, 0, errNoDigits, '2'},
{"0B2", 0, false, nil, 2, 0, errNoDigits, '2'},
{"0o8", 0, false, nil, 8, 0, errNoDigits, '8'},
{"0O8", 0, false, nil, 8, 0, errNoDigits, '8'},
{"0xg", 0, false, nil, 16, 0, errNoDigits, 'g'},
{"0Xg", 0, false, nil, 16, 0, errNoDigits, 'g'},
{"345", 2, false, nil, 2, 0, errNoDigits, '3'},
// invalid: incorrect use of decimal point
{s: ".0"},
{s: ".0", base: 10},
{s: ".", base: 0},
{s: "0x.0"},
{"._", 0, true, nil, 10, 0, errNoDigits, 0},
{".0", 0, false, nil, 10, 0, errNoDigits, '.'},
{".0", 10, false, nil, 10, 0, errNoDigits, '.'},
{".", 0, true, nil, 10, 0, errNoDigits, 0},
{"0x.", 0, true, nil, 16, 0, errNoDigits, 0},
{"0x.g", 0, true, nil, 16, 0, errNoDigits, 'g'},
{"0x.0", 0, false, nil, 16, 0, errNoDigits, '.'},
// invalid: incorrect use of separators
{"_0", 0, false, nil, 10, 1, errInvalSep, 0},
{"0_", 0, false, nil, 10, 1, errInvalSep, 0},
{"0__0", 0, false, nil, 8, 1, errInvalSep, 0},
{"0x___0", 0, false, nil, 16, 1, errInvalSep, 0},
{"0_x", 0, false, nil, 10, 1, errInvalSep, 'x'},
{"0_8", 0, false, nil, 10, 1, errInvalSep, '8'},
{"123_.", 0, true, nat{123}, 10, 0, errInvalSep, 0},
{"._123", 0, true, nat{123}, 10, -3, errInvalSep, 0},
{"0b__1000", 0, false, nat{0x8}, 2, 4, errInvalSep, 0},
{"0o60___0", 0, false, nat{0600}, 8, 3, errInvalSep, 0},
{"0466_", 0, false, nat{0466}, 8, 3, errInvalSep, 0},
{"01234567_8", 0, false, nat{01234567}, 8, 7, errInvalSep, '8'},
{"1_.", 0, true, nat{1}, 10, 0, errInvalSep, 0},
{"0._1", 0, true, nat{1}, 10, -1, errInvalSep, 0},
{"2.7_", 0, true, nat{27}, 10, -1, errInvalSep, 0},
{"0x1.0_", 0, true, nat{0x10}, 16, -1, errInvalSep, 0},
// valid: separators are not accepted for base != 0
{"0_", 10, false, nil, 10, 1, nil, '_'},
{"1__0", 10, false, nat{1}, 10, 1, nil, '_'},
{"0__8", 10, false, nil, 10, 1, nil, '_'},
{"xy_z_", 36, false, nat{33*36 + 34}, 36, 2, nil, '_'},
// valid, no decimal point
{"0", 0, false, nil, 10, 1, true, 0},
{"0", 10, false, nil, 10, 1, true, 0},
{"0", 36, false, nil, 36, 1, true, 0},
{"0", 62, false, nil, 62, 1, true, 0},
{"1", 0, false, nat{1}, 10, 1, true, 0},
{"1", 10, false, nat{1}, 10, 1, true, 0},
{"0 ", 0, false, nil, 10, 1, true, ' '},
{"00 ", 0, false, nil, 8, 1, true, ' '}, // octal 0
{"0b1", 0, false, nat{1}, 2, 1, true, 0},
{"0B11000101", 0, false, nat{0xc5}, 2, 8, true, 0},
{"0B110001012", 0, false, nat{0xc5}, 2, 8, true, '2'},
{"07", 0, false, nat{7}, 8, 1, true, 0},
{"08", 0, false, nil, 10, 1, true, '8'},
{"08", 10, false, nat{8}, 10, 2, true, 0},
{"018", 0, false, nat{1}, 8, 1, true, '8'},
{"0o7", 0, false, nat{7}, 8, 1, true, 0},
{"0o18", 0, false, nat{1}, 8, 1, true, '8'},
{"0O17", 0, false, nat{017}, 8, 2, true, 0},
{"03271", 0, false, nat{03271}, 8, 4, true, 0},
{"10ab", 0, false, nat{10}, 10, 2, true, 'a'},
{"1234567890", 0, false, nat{1234567890}, 10, 10, true, 0},
{"A", 36, false, nat{10}, 36, 1, true, 0},
{"A", 37, false, nat{36}, 37, 1, true, 0},
{"xyz", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, true, 0},
{"XYZ?", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, true, '?'},
{"XYZ?", 62, false, nat{(59*62+60)*62 + 61}, 62, 3, true, '?'},
{"0x", 16, false, nil, 16, 1, true, 'x'},
{"0xdeadbeef", 0, false, nat{0xdeadbeef}, 16, 8, true, 0},
{"0XDEADBEEF", 0, false, nat{0xdeadbeef}, 16, 8, true, 0},
{"0", 0, false, nil, 10, 1, nil, 0},
{"0", 36, false, nil, 36, 1, nil, 0},
{"0", 62, false, nil, 62, 1, nil, 0},
{"1", 0, false, nat{1}, 10, 1, nil, 0},
{"1", 10, false, nat{1}, 10, 1, nil, 0},
{"0 ", 0, false, nil, 10, 1, nil, ' '},
{"00 ", 0, false, nil, 8, 1, nil, ' '}, // octal 0
{"0b1", 0, false, nat{1}, 2, 1, nil, 0},
{"0B11000101", 0, false, nat{0xc5}, 2, 8, nil, 0},
{"0B110001012", 0, false, nat{0xc5}, 2, 8, nil, '2'},
{"07", 0, false, nat{7}, 8, 1, nil, 0},
{"08", 0, false, nil, 10, 1, nil, '8'},
{"08", 10, false, nat{8}, 10, 2, nil, 0},
{"018", 0, false, nat{1}, 8, 1, nil, '8'},
{"0o7", 0, false, nat{7}, 8, 1, nil, 0},
{"0o18", 0, false, nat{1}, 8, 1, nil, '8'},
{"0O17", 0, false, nat{017}, 8, 2, nil, 0},
{"03271", 0, false, nat{03271}, 8, 4, nil, 0},
{"10ab", 0, false, nat{10}, 10, 2, nil, 'a'},
{"1234567890", 0, false, nat{1234567890}, 10, 10, nil, 0},
{"A", 36, false, nat{10}, 36, 1, nil, 0},
{"A", 37, false, nat{36}, 37, 1, nil, 0},
{"xyz", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, nil, 0},
{"XYZ?", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, nil, '?'},
{"XYZ?", 62, false, nat{(59*62+60)*62 + 61}, 62, 3, nil, '?'},
{"0x", 16, false, nil, 16, 1, nil, 'x'},
{"0xdeadbeef", 0, false, nat{0xdeadbeef}, 16, 8, nil, 0},
{"0XDEADBEEF", 0, false, nat{0xdeadbeef}, 16, 8, nil, 0},
// valid, with decimal point
{"0.", 0, false, nil, 10, 1, true, '.'},
{"0.", 10, true, nil, 10, 0, true, 0},
{"0.1.2", 10, true, nat{1}, 10, -1, true, '.'},
{".000", 10, true, nil, 10, -3, true, 0},
{"12.3", 10, true, nat{123}, 10, -1, true, 0},
{"012.345", 10, true, nat{12345}, 10, -3, true, 0},
{"0.1", 0, true, nat{1}, 10, -1, true, 0},
{"0.1", 2, true, nat{1}, 2, -1, true, 0},
{"0.12", 2, true, nat{1}, 2, -1, true, '2'},
{"0b0.1", 0, true, nat{1}, 2, -1, true, 0},
{"0B0.12", 0, true, nat{1}, 2, -1, true, '2'},
{"0o0.7", 0, true, nat{7}, 8, -1, true, 0},
{"0O0.78", 0, true, nat{7}, 8, -1, true, '8'},
{"0.", 0, false, nil, 10, 1, nil, '.'},
{"0.", 10, true, nil, 10, 0, nil, 0},
{"0.1.2", 10, true, nat{1}, 10, -1, nil, '.'},
{".000", 10, true, nil, 10, -3, nil, 0},
{"12.3", 10, true, nat{123}, 10, -1, nil, 0},
{"012.345", 10, true, nat{12345}, 10, -3, nil, 0},
{"0.1", 0, true, nat{1}, 10, -1, nil, 0},
{"0.1", 2, true, nat{1}, 2, -1, nil, 0},
{"0.12", 2, true, nat{1}, 2, -1, nil, '2'},
{"0b0.1", 0, true, nat{1}, 2, -1, nil, 0},
{"0B0.12", 0, true, nat{1}, 2, -1, nil, '2'},
{"0o0.7", 0, true, nat{7}, 8, -1, nil, 0},
{"0O0.78", 0, true, nat{7}, 8, -1, nil, '8'},
{"0xdead.beef", 0, true, nat{0xdeadbeef}, 16, -4, nil, 0},
// valid, with separators
{"1_000", 0, false, nat{1000}, 10, 4, nil, 0},
{"0_466", 0, false, nat{0466}, 8, 3, nil, 0},
{"0o_600", 0, false, nat{0600}, 8, 3, nil, 0},
{"0x_f0_0d", 0, false, nat{0xf00d}, 16, 4, nil, 0},
{"0b1000_0001", 0, false, nat{0x81}, 2, 8, nil, 0},
{"1_000.000_1", 0, true, nat{10000001}, 10, -4, nil, 0},
{"0x_f00d.1e", 0, true, nat{0xf00d1e}, 16, -2, nil, 0},
{"0x_f00d.1E2", 0, true, nat{0xf00d1e2}, 16, -3, nil, 0},
{"0x_f00d.1eg", 0, true, nat{0xf00d1e}, 16, -2, nil, 'g'},
}
func TestScanBase(t *testing.T) {
for _, a := range natScanTests {
r := strings.NewReader(a.s)
x, b, count, err := nat(nil).scan(r, a.base, a.frac)
if err == nil && !a.ok {
t.Errorf("scan%+v\n\texpected error", a)
}
if err != nil {
if a.ok {
t.Errorf("scan%+v\n\tgot error = %s", a, err)
}
continue
if err != a.err {
t.Errorf("scan%+v\n\tgot error = %v; want %v", a, err, a.err)
}
if x.cmp(a.x) != 0 {
t.Errorf("scan%+v\n\tgot z = %v; want %v", a, x, a.x)
......
......@@ -87,7 +87,7 @@ func (z *Rat) SetString(s string) (*Rat, bool) {
// exponent
var exp int64
exp, _, err = scanExponent(r, false)
exp, _, err = scanExponent(r, false, false)
if err != nil {
return nil, false
}
......@@ -129,75 +129,96 @@ func (z *Rat) SetString(s string) (*Rat, bool) {
return z, true
}
// scanExponent scans the longest possible prefix of r representing a decimal
// ('e', 'E') or binary ('p', 'P') exponent, if any. It returns the exponent,
// the exponent base (10 or 2), or a read or syntax error, if any.
// scanExponent scans the longest possible prefix of r representing a base 10
// (``e'', ``E'') or a base 2 (``p'', ``P'') exponent, if any. It returns the
// exponent, the exponent base (10 or 2), or a read or syntax error, if any.
//
// If sepOk is set, an underscore character ``_'' may appear between successive
// exponent digits; such underscores do not change the value of the exponent.
// Incorrect placement of underscores is reported as an error if there are no
// other errors. If sepOk is not set, underscores are not recognized and thus
// terminate scanning like any other character that is not a valid digit.
//
// exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits .
// sign = "+" | "-" .
// digits = digit { digit } .
// digits = digit { [ '_' ] digit } .
// digit = "0" ... "9" .
//
// A binary exponent is only permitted if binExpOk is set.
func scanExponent(r io.ByteScanner, binExpOk bool) (exp int64, base int, err error) {
base = 10
var ch byte
if ch, err = r.ReadByte(); err != nil {
// A base 2 exponent is only permitted if base2ok is set.
func scanExponent(r io.ByteScanner, base2ok, sepOk bool) (exp int64, base int, err error) {
// one char look-ahead
ch, err := r.ReadByte()
if err != nil {
if err == io.EOF {
err = nil // no exponent; same as e0
err = nil
}
return
return 0, 10, err
}
// exponent char
switch ch {
case 'e', 'E':
// ok
base = 10
case 'p', 'P':
if binExpOk {
if base2ok {
base = 2
break // ok
}
fallthrough // binary exponent not permitted
default:
r.UnreadByte()
return // no exponent; same as e0
}
var neg bool
if neg, err = scanSign(r); err != nil {
return
r.UnreadByte() // ch does not belong to exponent anymore
return 0, 10, nil
}
// sign
var digits []byte
if neg {
digits = append(digits, '-')
ch, err = r.ReadByte()
if err == nil && (ch == '+' || ch == '-') {
if ch == '-' {
digits = append(digits, '-')
}
ch, err = r.ReadByte()
}
// no need to use nat.scan for exponent digits
// since we only care about int64 values - the
// from-scratch scan is easy enough and faster
for i := 0; ; i++ {
if ch, err = r.ReadByte(); err != nil {
if err != io.EOF || i == 0 {
return
// prev encodes the previously seen char: it is one
// of '_', '0' (a digit), or '.' (anything else). A
// valid separator '_' may only occur after a digit.
prev := '.'
invalSep := false
// exponent value
hasDigits := false
for err == nil {
if '0' <= ch && ch <= '9' {
digits = append(digits, ch)
prev = '0'
hasDigits = true
} else if ch == '_' && sepOk {
if prev != '0' {
invalSep = true
}
err = nil
break // i > 0
}
if ch < '0' || '9' < ch {
if i == 0 {
r.UnreadByte()
err = fmt.Errorf("invalid exponent (missing digits)")
return
}
break // i > 0
prev = '_'
} else {
r.UnreadByte() // ch does not belong to number anymore
break
}
digits = append(digits, ch)
ch, err = r.ReadByte()
}
if err == io.EOF {
err = nil
}
if err == nil && !hasDigits {
err = errNoDigits
}
if err == nil {
exp, err = strconv.ParseInt(string(digits), 10, 64)
}
// other errors take precedence over invalid separators
if err == nil && (invalSep || prev == '_') {
err = errInvalSep
}
// i > 0 => we have at least one digit
exp, err = strconv.ParseInt(string(digits), 10, 64)
return
}
......
......@@ -7,25 +7,91 @@ package big
import (
"bytes"
"fmt"
"io"
"math"
"strconv"
"strings"
"testing"
)
var exponentTests = []struct {
s string // string to be scanned
base2ok bool // true if 'p'/'P' exponents are accepted
sepOk bool // true if '_' separators are accepted
x int64 // expected exponent
b int // expected exponent base
err error // expected error
next rune // next character (or 0, if at EOF)
}{
// valid, without separators
{"", false, false, 0, 10, nil, 0},
{"1", false, false, 0, 10, nil, '1'},
{"e0", false, false, 0, 10, nil, 0},
{"E1", false, false, 1, 10, nil, 0},
{"e+10", false, false, 10, 10, nil, 0},
{"e-10", false, false, -10, 10, nil, 0},
{"e123456789a", false, false, 123456789, 10, nil, 'a'},
{"p", false, false, 0, 10, nil, 'p'},
{"P+100", false, false, 0, 10, nil, 'P'},
{"p0", true, false, 0, 2, nil, 0},
{"P-123", true, false, -123, 2, nil, 0},
{"p+0a", true, false, 0, 2, nil, 'a'},
{"p+123__", true, false, 123, 2, nil, '_'}, // '_' is not part of the number anymore
// valid, with separators
{"e+1_0", false, true, 10, 10, nil, 0},
{"e-1_0", false, true, -10, 10, nil, 0},
{"e123_456_789a", false, true, 123456789, 10, nil, 'a'},
{"P+1_00", false, true, 0, 10, nil, 'P'},
{"p-1_2_3", true, true, -123, 2, nil, 0},
// invalid: no digits
{"e", false, false, 0, 10, errNoDigits, 0},
{"ef", false, false, 0, 10, errNoDigits, 'f'},
{"e+", false, false, 0, 10, errNoDigits, 0},
{"E-x", false, false, 0, 10, errNoDigits, 'x'},
{"p", true, false, 0, 2, errNoDigits, 0},
{"P-", true, false, 0, 2, errNoDigits, 0},
{"p+e", true, false, 0, 2, errNoDigits, 'e'},
{"e+_x", false, true, 0, 10, errNoDigits, 'x'},
// invalid: incorrect use of separator
{"e0_", false, true, 0, 10, errInvalSep, 0},
{"e_0", false, true, 0, 10, errInvalSep, 0},
{"e-1_2__3", false, true, -123, 10, errInvalSep, 0},
}
func TestScanExponent(t *testing.T) {
for _, a := range exponentTests {
r := strings.NewReader(a.s)
x, b, err := scanExponent(r, a.base2ok, a.sepOk)
if err != a.err {
t.Errorf("scanExponent%+v\n\tgot error = %v; want %v", a, err, a.err)
}
if x != a.x {
t.Errorf("scanExponent%+v\n\tgot z = %v; want %v", a, x, a.x)
}
if b != a.b {
t.Errorf("scanExponent%+v\n\tgot b = %d; want %d", a, b, a.b)
}
next, _, err := r.ReadRune()
if err == io.EOF {
next = 0
err = nil
}
if err == nil && next != a.next {
t.Errorf("scanExponent%+v\n\tgot next = %q; want %q", a, next, a.next)
}
}
}
type StringTest struct {
in, out string
ok bool
}
var setStringTests = []StringTest{
{"0", "0", true},
{"-0", "0", true},
{"1", "1", true},
{"-1", "-1", true},
{"1.", "1", true},
{"1e0", "1", true},
{"1.e1", "10", true},
// invalid
{in: "1e"},
{in: "1.e"},
{in: "1e+14e-5"},
......@@ -33,6 +99,20 @@ var setStringTests = []StringTest{
{in: "r"},
{in: "a/b"},
{in: "a.b"},
{in: "1/0"},
{in: "4/3/2"}, // issue 17001
{in: "4/3/"},
{in: "4/3."},
{in: "4/"},
// valid
{"0", "0", true},
{"-0", "0", true},
{"1", "1", true},
{"-1", "-1", true},
{"1.", "1", true},
{"1e0", "1", true},
{"1.e1", "10", true},
{"-0.1", "-1/10", true},
{"-.1", "-1/10", true},
{"2/4", "1/2", true},
......@@ -49,24 +129,35 @@ var setStringTests = []StringTest{
{"106/141787961317645621392", "53/70893980658822810696", true},
{"204211327800791583.81095", "4084226556015831676219/20000", true},
{"0e9999999999", "0", true}, // issue #16176
{in: "1/0"},
{in: "4/3/2"}, // issue 17001
{in: "4/3/"},
{in: "4/3."},
{in: "4/"},
}
// These are not supported by fmt.Fscanf.
var setStringTests2 = []StringTest{
// invalid
{in: "4/3x"},
// invalid with separators
// (smoke tests only - a comprehensive set of tests is in natconv_test.go)
{in: "10_/1"},
{in: "_10/1"},
{in: "1/1__0"},
{in: "1_000.0"}, // floats are base 10 which doesn't permit separators; see also issue #29799
// valid
{"0b1000/3", "8/3", true},
{"0B1000/0x8", "1", true},
{"-010/1", "-8", true}, // TODO(gri) should we even permit octal here?
{"-010/1", "-8", true},
{"-010.", "-10", true},
{"-0o10/1", "-8", true},
{"0x10/1", "16", true},
{"0x10/0x20", "1/2", true},
{in: "4/3x"},
// TODO(gri) add more tests
// valid with separators
// (smoke tests only - a comprehensive set of tests is in natconv_test.go)
{"0b_1000/3", "8/3", true},
{"0B_10_00/0x8", "1", true},
{"0xdead/0B1101_1110_1010_1101", "1", true},
{"0B1101_1110_1010_1101/0XD_E_A_D", "1", true},
}
func TestRatSetString(t *testing.T) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment