math/big: add support for underscores '_' in numbers

The primary change is in nat.scan which now accepts underscores for base 0. While at it, streamlined error handling in that function as well. Also, improved the corresponding test significantly by checking the expected result values also in case of scan errors. The second major change is in scanExponent which now accepts underscores when the new sepOk argument is set. While at it, essentially rewrote that function to match error and underscore handling of nat.scan more closely. Added a new test for scanExponent which until now was only tested indirectly. Finally, updated the documentation for several functions and added many new test cases to clients of nat.scan. A major portion of this CL is due to much better test coverage. Updates #28493. Change-Id: I7f17b361b633fbe6c798619d891bd5e0a045b5c5 Reviewed-on: https://go-review.googlesource.com/c/go/+/166157Reviewed-by: Emmanuel Odeke <emm.odeke@gmail.com>

math/big: add support for underscores '_' in numbers
The primary change is in nat.scan which now accepts underscores for base 0. While at it, streamlined error handling in that function as well. Also, improved the corresponding test significantly by checking the expected result values also in case of scan errors. The second major change is in scanExponent which now accepts underscores when the new sepOk argument is set. While at it, essentially rewrote that function to match error and underscore handling of nat.scan more closely. Added a new test for scanExponent which until now was only tested indirectly. Finally, updated the documentation for several functions and added many new test cases to clients of nat.scan. A major portion of this CL is due to much better test coverage. Updates #28493. Change-Id: I7f17b361b633fbe6c798619d891bd5e0a045b5c5 Reviewed-on: https://go-review.googlesource.com/c/go/+/166157Reviewed-by: Emmanuel Odeke <emm.odeke@gmail.com>
cfa93ba5 · Robert Griesemer · c4078a19 · cfa93ba5 · cfa93ba5 · cfa93ba5
Commit cfa93ba5 authored Mar 07, 2019 by Robert Griesemer
8 changed files
--- a/src/math/big/floatconv.go
+++ b/src/math/big/floatconv.go
@@ -55,7 +55,7 @@ func (z *Float) scan(r io.ByteScanner, base int) (f *Float, b int, err error) {
 	// exponent
 	var exp int64
 	var ebase int
-	exp, ebase, err = scanExponent(r, true)
+	exp, ebase, err = scanExponent(r, true, base == 0)
 	if err != nil {
 		return
 	}
@@ -216,20 +216,29 @@ func (z *Float) pow5(n uint64) *Float {
 // point number with a mantissa in the given conversion base (the exponent
 // is always a decimal number), or a string representing an infinite value.
 //
+// For base 0, an underscore character ``_'' may appear between a base
+// prefix and an adjacent digit, and between successive digits; such
+// underscores do not change the value of the number, or the returned
+// digit count. Incorrect placement of underscores is reported as an
+// error if there are no other errors. If base != 0, underscores are
+// not recognized and thus terminate scanning like any other character
+// that is not a valid radix point or digit.
+//
 // It sets z to the (possibly rounded) value of the corresponding floating-
 // point value, and returns z, the actual base b, and an error err, if any.
 // The entire string (not just a prefix) must be consumed for success.
 // If z's precision is 0, it is changed to 64 before rounding takes effect.
 // The number must be of the form:
 //
-//	number   = [ sign ] [ prefix ] mantissa [ exponent ] | infinity .
+//     number    = [ sign ] ( float | "inf" | "Inf" ) .
-//	sign     = "+" | "-" .
+//     sign      = "+" | "-" .
-//	prefix   = "0" ( "b" | "B" | "o" | "O" | "x" | "X" ) .
+//     float     = ( mantissa | prefix pmantissa ) [ exponent ] .
-//	mantissa = digits | digits "." [ digits ] | "." digits .
+//     prefix    = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] .
-//	exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits .
+//     mantissa  = digits "." [ digits ] | digits | "." digits .
-//	digits   = digit { digit } .
+//     pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits .
-//	digit    = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
+//     exponent  = ( "e" | "E" | "p" | "P" ) [ sign ] digits .
-//	infinity = [ sign ] ( "inf" | "Inf" ) .
+//     digits    = digit { [ "_" ] digit } .
+//     digit     = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
 //
 // The base argument must be 0, 2, 8, 10, or 16. Providing an invalid base
 // argument will lead to a run-time panic.
@@ -240,11 +249,11 @@ func (z *Float) pow5(n uint64) *Float {
 // no prefix is accepted. The octal prefix "0" is not supported (a leading
 // "0" is simply considered a "0").
 //
-// A "p" or "P" exponent indicates a binary (rather then decimal) exponent;
+// A "p" or "P" exponent indicates a base 2 (rather then base 10) exponent;
-// for instance "0x1.fffffffffffffp1023" (using base 0) represents the
+// for instance, "0x1.fffffffffffffp1023" (using base 0) represents the
-// maximum float64 value. For hexadecimal mantissae, the exponent must
+// maximum float64 value. For hexadecimal mantissae, the exponent character
-// be binary, if present (an "e" or "E" exponent indicator cannot be
+// must be one of 'p' or 'P', if present (an "e" or "E" exponent indicator
-// distinguished from a mantissa digit).
+// cannot be distinguished from a mantissa digit).
 //
 // The returned *Float f is nil and the value of z is valid but not
 // defined if an error is reported.

--- a/src/math/big/floatconv_test.go
+++ b/src/math/big/floatconv_test.go
@@ -72,6 +72,21 @@ func TestFloatSetFloat64String(t *testing.T) {
 		{"infinity", nan},
 		{"foobar", nan},
+		// invalid underscores
+		{"_", nan},
+		{"0_", nan},
+		{"1__0", nan},
+		{"123_.", nan},
+		{"123._", nan},
+		{"123._4", nan},
+		{"1_2.3_4_", nan},
+		{"_.123", nan},
+		{"_123.456", nan},
+		{"10._0", nan},
+		{"10.0e_0", nan},
+		{"10.0e0_", nan},
+		{"0P-0__0", nan},
 		// misc decimal values
 		{"3.14159265", 3.14159265},
 		{"-687436.79457e-245", -687436.79457e-245},
@@ -142,6 +157,16 @@ func TestFloatSetFloat64String(t *testing.T) {
 		{"-0X0.00008P+16", -0.5},
 		{"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64},
 		{"0x1.fffffffffffffp1023", math.MaxFloat64},
+		// underscores
+		{"0_0", 0},
+		{"1_000.", 1000},
+		{"1_2_3.4_5_6", 123.456},
+		{"1.0e0_0", 1},
+		{"1p+1_0", 1024},
+		{"0b_1000", 0x8},
+		{"0b_1011_1101", 0xbd},
+		{"0x_f0_0d_1eP+0_8", 0xf00d1e00},
 	} {
 		var x Float
 		x.SetPrec(53)

--- a/src/math/big/int.go
+++ b/src/math/big/int.go
@@ -401,16 +401,24 @@ func (x *Int) IsUint64() bool {
 // (not just a prefix) must be valid for success. If SetString fails,
 // the value of z is undefined but the returned value is nil.
 //
-// The base argument must be 0 or a value between 2 and MaxBase. If the base
+// The base argument must be 0 or a value between 2 and MaxBase.
-// is 0, the string prefix determines the actual conversion base. A prefix of
+// For base 0, the number prefix determines the actual base: A prefix of
-// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a
+// ``0b'' or ``0B'' selects base 2, ``0'', ``0o'' or ``0O'' selects base 8,
-// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10.
+// and ``0x'' or ``0X'' selects base 16. Otherwise, the selected base is 10
+// and no prefix is accepted.
 //
 // For bases <= 36, lower and upper case letters are considered the same:
 // The letters 'a' to 'z' and 'A' to 'Z' represent digit values 10 to 35.
 // For bases > 36, the upper case letters 'A' to 'Z' represent the digit
 // values 36 to 61.
 //
+// For base 0, an underscore character ``_'' may appear between a base
+// prefix and an adjacent digit, and between successive digits; such
+// underscores do not change the value of the number.
+// Incorrect placement of underscores is reported as an error if there
+// are no other errors. If base != 0, underscores are not recognized
+// and act like any other character that is not a valid digit.
+//
 func (z *Int) SetString(s string, base int) (*Int, bool) {
 	return z.setFromScanner(strings.NewReader(s), base)
 }

--- a/src/math/big/intconv_test.go
+++ b/src/math/big/intconv_test.go
@@ -34,6 +34,16 @@ var stringTests = []struct {
 	{in: "0xg", base: 0},
 	{in: "g", base: 16},
+	// invalid inputs with separators
+	// (smoke tests only - a comprehensive set of tests is in natconv_test.go)
+	{in: "_"},
+	{in: "0_"},
+	{in: "_0"},
+	{in: "-1__0"},
+	{in: "0x10_"},
+	{in: "1_000", base: 10}, // separators are not permitted for bases != 0
+	{in: "d_e_a_d", base: 16},
 	// valid inputs
 	{"0", "0", 0, 0, true},
 	{"0", "0", 10, 0, true},
@@ -67,6 +77,13 @@ var stringTests = []struct {
 	{"A", "A", 37, 36, true},
 	{"ABCXYZ", "abcxyz", 36, 623741435, true},
 	{"ABCXYZ", "ABCXYZ", 62, 33536793425, true},
+	// valid input with separators
+	// (smoke tests only - a comprehensive set of tests is in natconv_test.go)
+	{"1_000", "1000", 0, 1000, true},
+	{"0b_1010", "10", 0, 10, true},
+	{"+0o_660", "432", 0, 0660, true},
+	{"-0xF00D_1E", "-15731998", 0, -0xf00d1e, true},
 }
 func TestIntText(t *testing.T) {

--- a/src/math/big/natconv.go
+++ b/src/math/big/natconv.go
@@ -55,16 +55,31 @@ func pow(x Word, n int) (p Word) {
 	return
 }
+// scan errors
+var (
+	errNoDigits = errors.New("number has no digits")
+	errInvalSep = errors.New("'_' must separate successive digits")
+)
 // scan scans the number corresponding to the longest possible prefix
 // from r representing an unsigned number in a given conversion base.
-// It returns the corresponding natural number res, the actual base b,
+// scan returns the corresponding natural number res, the actual base b,
 // a digit count, and a read or syntax error err, if any.
 //
-//     number   = [ prefix ] mantissa .
+// For base 0, an underscore character ``_'' may appear between a base
-//     prefix   = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] .
+// prefix and an adjacent digit, and between successive digits; such
-//     mantissa = digits | digits "." [ digits ] | "." digits .
+// underscores do not change the value of the number, or the returned
-//     digits   = digit { digit } .
+// digit count. Incorrect placement of underscores is reported as an
-//     digit    = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
+// error if there are no other errors. If base != 0, underscores are
+// not recognized and thus terminate scanning like any other character
+// that is not a valid radix point or digit.
+//
+//     number    = mantissa | prefix pmantissa .
+//     prefix    = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] .
+//     mantissa  = digits "." [ digits ] | digits | "." digits .
+//     pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits .
+//     digits    = digit { [ "_" ] digit } .
+//     digit     = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
 //
 // Unless fracOk is set, the base argument must be 0 or a value between
 // 2 and MaxBase. If fracOk is set, the base argument must be one of
@@ -92,53 +107,51 @@ func pow(x Word, n int) (p Word) {
 // In this case, the actual value of the scanned number is res * b**count.
 //
 func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count int, err error) {
-	// reject illegal bases
+	// reject invalid bases
 	baseOk := base == 0 ||
 		!fracOk && 2 <= base && base <= MaxBase ||
 		fracOk && (base == 2 || base == 8 || base == 10 || base == 16)
 	if !baseOk {
-		panic(fmt.Sprintf("illegal number base %d", base))
+		panic(fmt.Sprintf("invalid number base %d", base))
 	}
+	// prev encodes the previously seen char: it is one
+	// of '_', '0' (a digit), or '.' (anything else). A
+	// valid separator '_' may only occur after a digit
+	// and if base == 0.
+	prev := '.'
+	invalSep := false
 	// one char look-ahead
 	ch, err := r.ReadByte()
-	if err != nil {
-		return // io.EOF is also an error in this case
-	}
 	// determine actual base
 	b, prefix := base, 0
 	if base == 0 {
 		// actual base is 10 unless there's a base prefix
 		b = 10
-		if ch == '0' {
+		if err == nil && ch == '0' {
+			prev = '0'
 			count = 1
 			ch, err = r.ReadByte()
-			if err != nil {
+			if err == nil {
-				if err == io.EOF {
+				// possibly one of 0b, 0B, 0o, 0O, 0x, 0X
-					err = nil // not an error; input is "0"
+				switch ch {
-					res = z[:0]
+				case 'b', 'B':
-				}
+					b, prefix = 2, 'b'
-				return
+				case 'o', 'O':
-			}
+					b, prefix = 8, 'o'
-			// possibly one of 0b, 0B, 0o, 0O, 0x, 0X
+				case 'x', 'X':
-			switch ch {
+					b, prefix = 16, 'x'
-			case 'b', 'B':
+				default:
-				b, prefix = 2, 'b'
+					if !fracOk {
-			case 'o', 'O':
+						b, prefix = 8, '0'
-				b, prefix = 8, 'o'
+					}
-			case 'x', 'X':
-				b, prefix = 16, 'x'
-			default:
-				if !fracOk {
-					b, prefix = 8, '0'
 				}
-			}
+				if prefix != 0 {
-			if prefix != 0 {
+					count = 0 // prefix is not counted
-				count = 0 // prefix is not counted
+					if prefix != '0' {
-				if prefix != '0' {
+						ch, err = r.ReadByte()
-					if ch, err = r.ReadByte(); err != nil {
-						return // io.EOF is also an error in this case
 					}
 				}
 			}
@@ -155,76 +168,76 @@ func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count in
 	di := Word(0)       // 0 <= di < b1**i < bn
 	i := 0              // 0 <= i < n
 	dp := -1            // position of decimal point
-	for {
+	for err == nil {
-		if fracOk && ch == '.' {
+		if ch == '.' && fracOk {
 			fracOk = false
+			if prev == '_' {
+				invalSep = true
+			}
+			prev = '.'
 			dp = count
-			// advance
+		} else if ch == '_' && base == 0 {
-			if ch, err = r.ReadByte(); err != nil {
+			if prev != '0' {
-				if err == io.EOF {
+				invalSep = true
-					err = nil
+			}
-					break
+			prev = '_'
+		} else {
+			// convert rune into digit value d1
+			var d1 Word
+			switch {
+			case '0' <= ch && ch <= '9':
+				d1 = Word(ch - '0')
+			case 'a' <= ch && ch <= 'z':
+				d1 = Word(ch - 'a' + 10)
+			case 'A' <= ch && ch <= 'Z':
+				if b <= maxBaseSmall {
+					d1 = Word(ch - 'A' + 10)
+				} else {
+					d1 = Word(ch - 'A' + maxBaseSmall)
 				}
-				return
+			default:
+				d1 = MaxBase + 1
 			}
-		}
+			if d1 >= b1 {
+				r.UnreadByte() // ch does not belong to number anymore
+				break
+			}
+			prev = '0'
+			count++
-		// convert rune into digit value d1
+			// collect d1 in di
-		var d1 Word
+			di = di*b1 + d1
-		switch {
+			i++
-		case '0' <= ch && ch <= '9':
-			d1 = Word(ch - '0')
+			// if di is "full", add it to the result
-		case 'a' <= ch && ch <= 'z':
+			if i == n {
-			d1 = Word(ch - 'a' + 10)
+				z = z.mulAddWW(z, bn, di)
-		case 'A' <= ch && ch <= 'Z':
+				di = 0
-			if b <= maxBaseSmall {
+				i = 0
-				d1 = Word(ch - 'A' + 10)
-			} else {
-				d1 = Word(ch - 'A' + maxBaseSmall)
 			}
-		default:
-			d1 = MaxBase + 1
-		}
-		if d1 >= b1 {
-			r.UnreadByte() // ch does not belong to number anymore
-			break
 		}
-		count++
-		// collect d1 in di
+		ch, err = r.ReadByte()
-		di = di*b1 + d1
+	}
-		i++
-		// if di is "full", add it to the result
+	if err == io.EOF {
-		if i == n {
+		err = nil
-			z = z.mulAddWW(z, bn, di)
+	}
-			di = 0
-			i = 0
-		}
-		// advance
+	// other errors take precedence over invalid separators
-		if ch, err = r.ReadByte(); err != nil {
+	if err == nil && (invalSep || prev == '_') {
-			if err == io.EOF {
+		err = errInvalSep
-				err = nil
-				break
-			}
-			return
-		}
 	}
 	if count == 0 {
 		// no digits found
 		if prefix == '0' {
-			// there was only the octal prefix 0 (possibly followed by digits > 7);
+			// there was only the octal prefix 0 (possibly followed by separators and digits > 7);
-			// count as one digit and return base 10, not 8
+			// interpret as decimal 0
-			count = 1
+			return z[:0], 10, 1, err
-			b = 10
-		} else {
-			err = errors.New("syntax error scanning number")
 		}
-		return
+		err = errNoDigits // fall through; result will be 0
 	}
-	// count > 0
 	// add remaining digits to result
 	if i > 0 {
@@ -232,9 +245,9 @@ func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count in
 	}
 	res = z.norm()
-	// adjust for fraction, if any
+	// adjust count for fraction, if any
 	if dp >= 0 {
-		// 0 <= dp <= count > 0
+		// 0 <= dp <= count
 		count = dp - count
 	}

--- a/src/math/big/natconv_test.go
+++ b/src/math/big/natconv_test.go
@@ -109,92 +109,126 @@ var natScanTests = []struct {
 	x     nat    // expected nat
 	b     int    // expected base
 	count int    // expected digit count
-	ok    bool   // expected success
+	err   error  // expected error
 	next  rune   // next character (or 0, if at EOF)
 }{
-	// invalid: no mantissa
+	// invalid: no digits
-	{},
+	{"", 0, false, nil, 10, 0, errNoDigits, 0},
-	{s: "?"},
+	{"_", 0, false, nil, 10, 0, errNoDigits, 0},
-	{base: 10},
+	{"?", 0, false, nil, 10, 0, errNoDigits, '?'},
-	{base: 36},
+	{"?", 10, false, nil, 10, 0, errNoDigits, '?'},
-	{base: 62},
+	{"", 10, false, nil, 10, 0, errNoDigits, 0},
-	{s: "?", base: 10},
+	{"", 36, false, nil, 36, 0, errNoDigits, 0},
-	{s: "0b"},
+	{"", 62, false, nil, 62, 0, errNoDigits, 0},
-	{s: "0o"},
+	{"0b", 0, false, nil, 2, 0, errNoDigits, 0},
-	{s: "0x"},
+	{"0o", 0, false, nil, 8, 0, errNoDigits, 0},
-	{s: "0b2"},
+	{"0x", 0, false, nil, 16, 0, errNoDigits, 0},
-	{s: "0B2"},
+	{"0x_", 0, false, nil, 16, 0, errNoDigits, 0},
-	{s: "0o8"},
+	{"0b2", 0, false, nil, 2, 0, errNoDigits, '2'},
-	{s: "0O8"},
+	{"0B2", 0, false, nil, 2, 0, errNoDigits, '2'},
-	{s: "0xg"},
+	{"0o8", 0, false, nil, 8, 0, errNoDigits, '8'},
-	{s: "0Xg"},
+	{"0O8", 0, false, nil, 8, 0, errNoDigits, '8'},
-	{s: "345", base: 2},
+	{"0xg", 0, false, nil, 16, 0, errNoDigits, 'g'},
+	{"0Xg", 0, false, nil, 16, 0, errNoDigits, 'g'},
+	{"345", 2, false, nil, 2, 0, errNoDigits, '3'},
 	// invalid: incorrect use of decimal point
-	{s: ".0"},
+	{"._", 0, true, nil, 10, 0, errNoDigits, 0},
-	{s: ".0", base: 10},
+	{".0", 0, false, nil, 10, 0, errNoDigits, '.'},
-	{s: ".", base: 0},
+	{".0", 10, false, nil, 10, 0, errNoDigits, '.'},
-	{s: "0x.0"},
+	{".", 0, true, nil, 10, 0, errNoDigits, 0},
+	{"0x.", 0, true, nil, 16, 0, errNoDigits, 0},
+	{"0x.g", 0, true, nil, 16, 0, errNoDigits, 'g'},
+	{"0x.0", 0, false, nil, 16, 0, errNoDigits, '.'},
+	// invalid: incorrect use of separators
+	{"_0", 0, false, nil, 10, 1, errInvalSep, 0},
+	{"0_", 0, false, nil, 10, 1, errInvalSep, 0},
+	{"0__0", 0, false, nil, 8, 1, errInvalSep, 0},
+	{"0x___0", 0, false, nil, 16, 1, errInvalSep, 0},
+	{"0_x", 0, false, nil, 10, 1, errInvalSep, 'x'},
+	{"0_8", 0, false, nil, 10, 1, errInvalSep, '8'},
+	{"123_.", 0, true, nat{123}, 10, 0, errInvalSep, 0},
+	{"._123", 0, true, nat{123}, 10, -3, errInvalSep, 0},
+	{"0b__1000", 0, false, nat{0x8}, 2, 4, errInvalSep, 0},
+	{"0o60___0", 0, false, nat{0600}, 8, 3, errInvalSep, 0},
+	{"0466_", 0, false, nat{0466}, 8, 3, errInvalSep, 0},
+	{"01234567_8", 0, false, nat{01234567}, 8, 7, errInvalSep, '8'},
+	{"1_.", 0, true, nat{1}, 10, 0, errInvalSep, 0},
+	{"0._1", 0, true, nat{1}, 10, -1, errInvalSep, 0},
+	{"2.7_", 0, true, nat{27}, 10, -1, errInvalSep, 0},
+	{"0x1.0_", 0, true, nat{0x10}, 16, -1, errInvalSep, 0},
+	// valid: separators are not accepted for base != 0
+	{"0_", 10, false, nil, 10, 1, nil, '_'},
+	{"1__0", 10, false, nat{1}, 10, 1, nil, '_'},
+	{"0__8", 10, false, nil, 10, 1, nil, '_'},
+	{"xy_z_", 36, false, nat{33*36 + 34}, 36, 2, nil, '_'},
 	// valid, no decimal point
-	{"0", 0, false, nil, 10, 1, true, 0},
+	{"0", 0, false, nil, 10, 1, nil, 0},
-	{"0", 10, false, nil, 10, 1, true, 0},
+	{"0", 36, false, nil, 36, 1, nil, 0},
-	{"0", 36, false, nil, 36, 1, true, 0},
+	{"0", 62, false, nil, 62, 1, nil, 0},
-	{"0", 62, false, nil, 62, 1, true, 0},
+	{"1", 0, false, nat{1}, 10, 1, nil, 0},
-	{"1", 0, false, nat{1}, 10, 1, true, 0},
+	{"1", 10, false, nat{1}, 10, 1, nil, 0},
-	{"1", 10, false, nat{1}, 10, 1, true, 0},
+	{"0 ", 0, false, nil, 10, 1, nil, ' '},
-	{"0 ", 0, false, nil, 10, 1, true, ' '},
+	{"00 ", 0, false, nil, 8, 1, nil, ' '}, // octal 0
-	{"00 ", 0, false, nil, 8, 1, true, ' '}, // octal 0
+	{"0b1", 0, false, nat{1}, 2, 1, nil, 0},
-	{"0b1", 0, false, nat{1}, 2, 1, true, 0},
+	{"0B11000101", 0, false, nat{0xc5}, 2, 8, nil, 0},
-	{"0B11000101", 0, false, nat{0xc5}, 2, 8, true, 0},
+	{"0B110001012", 0, false, nat{0xc5}, 2, 8, nil, '2'},
-	{"0B110001012", 0, false, nat{0xc5}, 2, 8, true, '2'},
+	{"07", 0, false, nat{7}, 8, 1, nil, 0},
-	{"07", 0, false, nat{7}, 8, 1, true, 0},
+	{"08", 0, false, nil, 10, 1, nil, '8'},
-	{"08", 0, false, nil, 10, 1, true, '8'},
+	{"08", 10, false, nat{8}, 10, 2, nil, 0},
-	{"08", 10, false, nat{8}, 10, 2, true, 0},
+	{"018", 0, false, nat{1}, 8, 1, nil, '8'},
-	{"018", 0, false, nat{1}, 8, 1, true, '8'},
+	{"0o7", 0, false, nat{7}, 8, 1, nil, 0},
-	{"0o7", 0, false, nat{7}, 8, 1, true, 0},
+	{"0o18", 0, false, nat{1}, 8, 1, nil, '8'},
-	{"0o18", 0, false, nat{1}, 8, 1, true, '8'},
+	{"0O17", 0, false, nat{017}, 8, 2, nil, 0},
-	{"0O17", 0, false, nat{017}, 8, 2, true, 0},
+	{"03271", 0, false, nat{03271}, 8, 4, nil, 0},
-	{"03271", 0, false, nat{03271}, 8, 4, true, 0},
+	{"10ab", 0, false, nat{10}, 10, 2, nil, 'a'},
-	{"10ab", 0, false, nat{10}, 10, 2, true, 'a'},
+	{"1234567890", 0, false, nat{1234567890}, 10, 10, nil, 0},
-	{"1234567890", 0, false, nat{1234567890}, 10, 10, true, 0},
+	{"A", 36, false, nat{10}, 36, 1, nil, 0},
-	{"A", 36, false, nat{10}, 36, 1, true, 0},
+	{"A", 37, false, nat{36}, 37, 1, nil, 0},
-	{"A", 37, false, nat{36}, 37, 1, true, 0},
+	{"xyz", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, nil, 0},
-	{"xyz", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, true, 0},
+	{"XYZ?", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, nil, '?'},
-	{"XYZ?", 36, false, nat{(33*36+34)*36 + 35}, 36, 3, true, '?'},
+	{"XYZ?", 62, false, nat{(59*62+60)*62 + 61}, 62, 3, nil, '?'},
-	{"XYZ?", 62, false, nat{(59*62+60)*62 + 61}, 62, 3, true, '?'},
+	{"0x", 16, false, nil, 16, 1, nil, 'x'},
-	{"0x", 16, false, nil, 16, 1, true, 'x'},
+	{"0xdeadbeef", 0, false, nat{0xdeadbeef}, 16, 8, nil, 0},
-	{"0xdeadbeef", 0, false, nat{0xdeadbeef}, 16, 8, true, 0},
+	{"0XDEADBEEF", 0, false, nat{0xdeadbeef}, 16, 8, nil, 0},
-	{"0XDEADBEEF", 0, false, nat{0xdeadbeef}, 16, 8, true, 0},
 	// valid, with decimal point
-	{"0.", 0, false, nil, 10, 1, true, '.'},
+	{"0.", 0, false, nil, 10, 1, nil, '.'},
-	{"0.", 10, true, nil, 10, 0, true, 0},
+	{"0.", 10, true, nil, 10, 0, nil, 0},
-	{"0.1.2", 10, true, nat{1}, 10, -1, true, '.'},
+	{"0.1.2", 10, true, nat{1}, 10, -1, nil, '.'},
-	{".000", 10, true, nil, 10, -3, true, 0},
+	{".000", 10, true, nil, 10, -3, nil, 0},
-	{"12.3", 10, true, nat{123}, 10, -1, true, 0},
+	{"12.3", 10, true, nat{123}, 10, -1, nil, 0},
-	{"012.345", 10, true, nat{12345}, 10, -3, true, 0},
+	{"012.345", 10, true, nat{12345}, 10, -3, nil, 0},
-	{"0.1", 0, true, nat{1}, 10, -1, true, 0},
+	{"0.1", 0, true, nat{1}, 10, -1, nil, 0},
-	{"0.1", 2, true, nat{1}, 2, -1, true, 0},
+	{"0.1", 2, true, nat{1}, 2, -1, nil, 0},
-	{"0.12", 2, true, nat{1}, 2, -1, true, '2'},
+	{"0.12", 2, true, nat{1}, 2, -1, nil, '2'},
-	{"0b0.1", 0, true, nat{1}, 2, -1, true, 0},
+	{"0b0.1", 0, true, nat{1}, 2, -1, nil, 0},
-	{"0B0.12", 0, true, nat{1}, 2, -1, true, '2'},
+	{"0B0.12", 0, true, nat{1}, 2, -1, nil, '2'},
-	{"0o0.7", 0, true, nat{7}, 8, -1, true, 0},
+	{"0o0.7", 0, true, nat{7}, 8, -1, nil, 0},
-	{"0O0.78", 0, true, nat{7}, 8, -1, true, '8'},
+	{"0O0.78", 0, true, nat{7}, 8, -1, nil, '8'},
+	{"0xdead.beef", 0, true, nat{0xdeadbeef}, 16, -4, nil, 0},
+	// valid, with separators
+	{"1_000", 0, false, nat{1000}, 10, 4, nil, 0},
+	{"0_466", 0, false, nat{0466}, 8, 3, nil, 0},
+	{"0o_600", 0, false, nat{0600}, 8, 3, nil, 0},
+	{"0x_f0_0d", 0, false, nat{0xf00d}, 16, 4, nil, 0},
+	{"0b1000_0001", 0, false, nat{0x81}, 2, 8, nil, 0},
+	{"1_000.000_1", 0, true, nat{10000001}, 10, -4, nil, 0},
+	{"0x_f00d.1e", 0, true, nat{0xf00d1e}, 16, -2, nil, 0},
+	{"0x_f00d.1E2", 0, true, nat{0xf00d1e2}, 16, -3, nil, 0},
+	{"0x_f00d.1eg", 0, true, nat{0xf00d1e}, 16, -2, nil, 'g'},
 }
 func TestScanBase(t *testing.T) {
 	for _, a := range natScanTests {
 		r := strings.NewReader(a.s)
 		x, b, count, err := nat(nil).scan(r, a.base, a.frac)
-		if err == nil && !a.ok {
+		if err != a.err {
-			t.Errorf("scan%+v\n\texpected error", a)
+			t.Errorf("scan%+v\n\tgot error = %v; want %v", a, err, a.err)
-		}
-		if err != nil {
-			if a.ok {
-				t.Errorf("scan%+v\n\tgot error = %s", a, err)
-			}
-			continue
 		}
 		if x.cmp(a.x) != 0 {
 			t.Errorf("scan%+v\n\tgot z = %v; want %v", a, x, a.x)

--- a/src/math/big/ratconv.go
+++ b/src/math/big/ratconv.go
@@ -87,7 +87,7 @@ func (z *Rat) SetString(s string) (*Rat, bool) {
 	// exponent
 	var exp int64
-	exp, _, err = scanExponent(r, false)
+	exp, _, err = scanExponent(r, false, false)
 	if err != nil {
 		return nil, false
 	}
@@ -129,75 +129,96 @@ func (z *Rat) SetString(s string) (*Rat, bool) {
 	return z, true
 }
-// scanExponent scans the longest possible prefix of r representing a decimal
+// scanExponent scans the longest possible prefix of r representing a base 10
-// ('e', 'E') or binary ('p', 'P') exponent, if any. It returns the exponent,
+// (``e'', ``E'') or a base 2 (``p'', ``P'') exponent, if any. It returns the
-// the exponent base (10 or 2), or a read or syntax error, if any.
+// exponent, the exponent base (10 or 2), or a read or syntax error, if any.
+//
+// If sepOk is set, an underscore character ``_'' may appear between successive
+// exponent digits; such underscores do not change the value of the exponent.
+// Incorrect placement of underscores is reported as an error if there are no
+// other errors. If sepOk is not set, underscores are not recognized and thus
+// terminate scanning like any other character that is not a valid digit.
 //
 //	exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits .
 //	sign     = "+" | "-" .
-//	digits   = digit { digit } .
+//	digits   = digit { [ '_' ] digit } .
 //	digit    = "0" ... "9" .
 //
-// A binary exponent is only permitted if binExpOk is set.
+// A base 2 exponent is only permitted if base2ok is set.
-func scanExponent(r io.ByteScanner, binExpOk bool) (exp int64, base int, err error) {
+func scanExponent(r io.ByteScanner, base2ok, sepOk bool) (exp int64, base int, err error) {
-	base = 10
+	// one char look-ahead
+	ch, err := r.ReadByte()
-	var ch byte
+	if err != nil {
-	if ch, err = r.ReadByte(); err != nil {
 		if err == io.EOF {
-			err = nil // no exponent; same as e0
+			err = nil
 		}
-		return
+		return 0, 10, err
 	}
+	// exponent char
 	switch ch {
 	case 'e', 'E':
-		// ok
+		base = 10
 	case 'p', 'P':
-		if binExpOk {
+		if base2ok {
 			base = 2
 			break // ok
 		}
 		fallthrough // binary exponent not permitted
 	default:
-		r.UnreadByte()
+		r.UnreadByte() // ch does not belong to exponent anymore
-		return // no exponent; same as e0
+		return 0, 10, nil
-	}
-	var neg bool
-	if neg, err = scanSign(r); err != nil {
-		return
 	}
+	// sign
 	var digits []byte
-	if neg {
+	ch, err = r.ReadByte()
-		digits = append(digits, '-')
+	if err == nil && (ch == '+' || ch == '-') {
+		if ch == '-' {
+			digits = append(digits, '-')
+		}
+		ch, err = r.ReadByte()
 	}
-	// no need to use nat.scan for exponent digits
+	// prev encodes the previously seen char: it is one
-	// since we only care about int64 values - the
+	// of '_', '0' (a digit), or '.' (anything else). A
-	// from-scratch scan is easy enough and faster
+	// valid separator '_' may only occur after a digit.
-	for i := 0; ; i++ {
+	prev := '.'
-		if ch, err = r.ReadByte(); err != nil {
+	invalSep := false
-			if err != io.EOF || i == 0 {
-				return
+	// exponent value
+	hasDigits := false
+	for err == nil {
+		if '0' <= ch && ch <= '9' {
+			digits = append(digits, ch)
+			prev = '0'
+			hasDigits = true
+		} else if ch == '_' && sepOk {
+			if prev != '0' {
+				invalSep = true
 			}
-			err = nil
+			prev = '_'
-			break // i > 0
+		} else {
-		}
+			r.UnreadByte() // ch does not belong to number anymore
-		if ch < '0' || '9' < ch {
+			break
-			if i == 0 {
-				r.UnreadByte()
-				err = fmt.Errorf("invalid exponent (missing digits)")
-				return
-			}
-			break // i > 0
 		}
-		digits = append(digits, ch)
+		ch, err = r.ReadByte()
+	}
+	if err == io.EOF {
+		err = nil
+	}
+	if err == nil && !hasDigits {
+		err = errNoDigits
+	}
+	if err == nil {
+		exp, err = strconv.ParseInt(string(digits), 10, 64)
+	}
+	// other errors take precedence over invalid separators
+	if err == nil && (invalSep || prev == '_') {
+		err = errInvalSep
 	}
-	// i > 0 => we have at least one digit
-	exp, err = strconv.ParseInt(string(digits), 10, 64)
 	return
 }

--- a/src/math/big/ratconv_test.go
+++ b/src/math/big/ratconv_test.go
@@ -7,25 +7,91 @@ package big
 import (
 	"bytes"
 	"fmt"
+	"io"
 	"math"
 	"strconv"
 	"strings"
 	"testing"
 )
+var exponentTests = []struct {
+	s       string // string to be scanned
+	base2ok bool   // true if 'p'/'P' exponents are accepted
+	sepOk   bool   // true if '_' separators are accepted
+	x       int64  // expected exponent
+	b       int    // expected exponent base
+	err     error  // expected error
+	next    rune   // next character (or 0, if at EOF)
+}{
+	// valid, without separators
+	{"", false, false, 0, 10, nil, 0},
+	{"1", false, false, 0, 10, nil, '1'},
+	{"e0", false, false, 0, 10, nil, 0},
+	{"E1", false, false, 1, 10, nil, 0},
+	{"e+10", false, false, 10, 10, nil, 0},
+	{"e-10", false, false, -10, 10, nil, 0},
+	{"e123456789a", false, false, 123456789, 10, nil, 'a'},
+	{"p", false, false, 0, 10, nil, 'p'},
+	{"P+100", false, false, 0, 10, nil, 'P'},
+	{"p0", true, false, 0, 2, nil, 0},
+	{"P-123", true, false, -123, 2, nil, 0},
+	{"p+0a", true, false, 0, 2, nil, 'a'},
+	{"p+123__", true, false, 123, 2, nil, '_'}, // '_' is not part of the number anymore
+	// valid, with separators
+	{"e+1_0", false, true, 10, 10, nil, 0},
+	{"e-1_0", false, true, -10, 10, nil, 0},
+	{"e123_456_789a", false, true, 123456789, 10, nil, 'a'},
+	{"P+1_00", false, true, 0, 10, nil, 'P'},
+	{"p-1_2_3", true, true, -123, 2, nil, 0},
+	// invalid: no digits
+	{"e", false, false, 0, 10, errNoDigits, 0},
+	{"ef", false, false, 0, 10, errNoDigits, 'f'},
+	{"e+", false, false, 0, 10, errNoDigits, 0},
+	{"E-x", false, false, 0, 10, errNoDigits, 'x'},
+	{"p", true, false, 0, 2, errNoDigits, 0},
+	{"P-", true, false, 0, 2, errNoDigits, 0},
+	{"p+e", true, false, 0, 2, errNoDigits, 'e'},
+	{"e+_x", false, true, 0, 10, errNoDigits, 'x'},
+	// invalid: incorrect use of separator
+	{"e0_", false, true, 0, 10, errInvalSep, 0},
+	{"e_0", false, true, 0, 10, errInvalSep, 0},
+	{"e-1_2__3", false, true, -123, 10, errInvalSep, 0},
+}
+func TestScanExponent(t *testing.T) {
+	for _, a := range exponentTests {
+		r := strings.NewReader(a.s)
+		x, b, err := scanExponent(r, a.base2ok, a.sepOk)
+		if err != a.err {
+			t.Errorf("scanExponent%+v\n\tgot error = %v; want %v", a, err, a.err)
+		}
+		if x != a.x {
+			t.Errorf("scanExponent%+v\n\tgot z = %v; want %v", a, x, a.x)
+		}
+		if b != a.b {
+			t.Errorf("scanExponent%+v\n\tgot b = %d; want %d", a, b, a.b)
+		}
+		next, _, err := r.ReadRune()
+		if err == io.EOF {
+			next = 0
+			err = nil
+		}
+		if err == nil && next != a.next {
+			t.Errorf("scanExponent%+v\n\tgot next = %q; want %q", a, next, a.next)
+		}
+	}
+}
 type StringTest struct {
 	in, out string
 	ok      bool
 }
 var setStringTests = []StringTest{
-	{"0", "0", true},
+	// invalid
-	{"-0", "0", true},
-	{"1", "1", true},
-	{"-1", "-1", true},
-	{"1.", "1", true},
-	{"1e0", "1", true},
-	{"1.e1", "10", true},
 	{in: "1e"},
 	{in: "1.e"},
 	{in: "1e+14e-5"},
@@ -33,6 +99,20 @@ var setStringTests = []StringTest{
 	{in: "r"},
 	{in: "a/b"},
 	{in: "a.b"},
+	{in: "1/0"},
+	{in: "4/3/2"}, // issue 17001
+	{in: "4/3/"},
+	{in: "4/3."},
+	{in: "4/"},
+	// valid
+	{"0", "0", true},
+	{"-0", "0", true},
+	{"1", "1", true},
+	{"-1", "-1", true},
+	{"1.", "1", true},
+	{"1e0", "1", true},
+	{"1.e1", "10", true},
 	{"-0.1", "-1/10", true},
 	{"-.1", "-1/10", true},
 	{"2/4", "1/2", true},
@@ -49,24 +129,35 @@ var setStringTests = []StringTest{
 	{"106/141787961317645621392", "53/70893980658822810696", true},
 	{"204211327800791583.81095", "4084226556015831676219/20000", true},
 	{"0e9999999999", "0", true}, // issue #16176
-	{in: "1/0"},
-	{in: "4/3/2"}, // issue 17001
-	{in: "4/3/"},
-	{in: "4/3."},
-	{in: "4/"},
 }
 // These are not supported by fmt.Fscanf.
 var setStringTests2 = []StringTest{
+	// invalid
+	{in: "4/3x"},
+	// invalid with separators
+	// (smoke tests only - a comprehensive set of tests is in natconv_test.go)
+	{in: "10_/1"},
+	{in: "_10/1"},
+	{in: "1/1__0"},
+	{in: "1_000.0"}, // floats are base 10 which doesn't permit separators; see also issue #29799
+	// valid
 	{"0b1000/3", "8/3", true},
 	{"0B1000/0x8", "1", true},
-	{"-010/1", "-8", true}, // TODO(gri) should we even permit octal here?
+	{"-010/1", "-8", true},
 	{"-010.", "-10", true},
 	{"-0o10/1", "-8", true},
 	{"0x10/1", "16", true},
 	{"0x10/0x20", "1/2", true},
-	{in: "4/3x"},
-	// TODO(gri) add more tests
+	// valid with separators
+	// (smoke tests only - a comprehensive set of tests is in natconv_test.go)
+	{"0b_1000/3", "8/3", true},
+	{"0B_10_00/0x8", "1", true},
+	{"0xdead/0B1101_1110_1010_1101", "1", true},
+	{"0B1101_1110_1010_1101/0XD_E_A_D", "1", true},
 }
 func TestRatSetString(t *testing.T) {