Commit f601d412 authored by Russ Cox's avatar Russ Cox

fmt: scan new number syntax

This CL updates fmt's scanner to accept the new number syntaxes:

 - Hexadecimal floating-point values.
 - Digit-separating underscores.
 - Leading 0b and 0o prefixes.

See golang.org/design/19308-number-literals for background.

For #12711.
For #19308.
For #28493.
For #29008.

Change-Id: I5582af5c94059c781e6cf4e862441d3df3006adf
Reviewed-on: https://go-review.googlesource.com/c/160247Reviewed-by: default avatarRobert Griesemer <gri@golang.org>
parent ac51237a
...@@ -286,10 +286,10 @@ ...@@ -286,10 +286,10 @@
For example, %x will scan an integer as a hexadecimal number, For example, %x will scan an integer as a hexadecimal number,
and %v will scan the default representation format for the value. and %v will scan the default representation format for the value.
The Printf verbs %p and %T and the flags # and + are not implemented. The Printf verbs %p and %T and the flags # and + are not implemented.
The verbs %e %E %f %F %g and %G are all equivalent and scan any For floating-point and complex values, all valid formatting verbs
floating-point or complex value. For float and complex literals in (%b %e %E %f %F %g %G %x %X and %v) are equivalent and accept
scientific notation, both the decimal (e) and binary (p) exponent both decimal and hexadecimal notation (for example: "2.3e+7", "0x4.5p-8")
formats are supported (for example: "2.3e+7" and "4.5p-8"). and digit-separating underscores (for example: "3.14159_26535_89793").
Input processed by verbs is implicitly space-delimited: the Input processed by verbs is implicitly space-delimited: the
implementation of every verb except %c starts by discarding implementation of every verb except %c starts by discarding
...@@ -297,9 +297,10 @@ ...@@ -297,9 +297,10 @@
(and %v reading into a string) stops consuming input at the first (and %v reading into a string) stops consuming input at the first
space or newline character. space or newline character.
The familiar base-setting prefixes 0 (octal) and 0x The familiar base-setting prefixes 0b (binary), 0o and 0 (octal),
(hexadecimal) are accepted when scanning integers without and 0x (hexadecimal) are accepted when scanning integers
a format or with the %v verb. without a format or with the %v verb, as are digit-separating
underscores.
Width is interpreted in the input text but there is no Width is interpreted in the input text but there is no
syntax for scanning with a precision (no %5.2f, just %5f). syntax for scanning with a precision (no %5.2f, just %5f).
......
...@@ -562,7 +562,7 @@ const ( ...@@ -562,7 +562,7 @@ const (
hexadecimalDigits = "0123456789aAbBcCdDeEfF" hexadecimalDigits = "0123456789aAbBcCdDeEfF"
sign = "+-" sign = "+-"
period = "." period = "."
exponent = "eEp" exponent = "eEpP"
) )
// getBase returns the numeric base represented by the verb and its digit string. // getBase returns the numeric base represented by the verb and its digit string.
...@@ -609,20 +609,26 @@ func (s *ss) scanRune(bitSize int) int64 { ...@@ -609,20 +609,26 @@ func (s *ss) scanRune(bitSize int) int64 {
return r return r
} }
// scanBasePrefix reports whether the integer begins with a 0 or 0x, // scanBasePrefix reports whether the integer begins with a bas prefix
// and returns the base, digit string, and whether a zero was found. // and returns the base, digit string, and whether a zero was found.
// It is called only if the verb is %v. // It is called only if the verb is %v.
func (s *ss) scanBasePrefix() (base int, digits string, found bool) { func (s *ss) scanBasePrefix() (base int, digits string, found bool) {
if !s.peek("0") { if !s.peek("0") {
return 10, decimalDigits, false return 0, decimalDigits + "_", false
} }
s.accept("0") s.accept("0")
found = true // We've put a digit into the token buffer. found = true // We've put a digit into the token buffer.
// Special cases for '0' && '0x' // Special cases for 0, 0b, 0o, 0x.
base, digits = 8, octalDigits base, digits = 0, octalDigits+"_"
if s.peek("xX") { if s.peek("bB") {
s.consume("xX", false) s.consume("bB", true)
base, digits = 16, hexadecimalDigits base, digits = 0, binaryDigits+"_"
} else if s.peek("oO") {
s.consume("oO", true)
base, digits = 0, octalDigits+"_"
} else if s.peek("xX") {
s.consume("xX", true)
base, digits = 0, hexadecimalDigits+"_"
} }
return return
} }
...@@ -705,21 +711,27 @@ func (s *ss) floatToken() string { ...@@ -705,21 +711,27 @@ func (s *ss) floatToken() string {
if s.accept("iI") && s.accept("nN") && s.accept("fF") { if s.accept("iI") && s.accept("nN") && s.accept("fF") {
return string(s.buf) return string(s.buf)
} }
digits := decimalDigits + "_"
exp := exponent
if s.accept("0") && s.accept("xX") {
digits = hexadecimalDigits + "_"
exp = "pP"
}
// digits? // digits?
for s.accept(decimalDigits) { for s.accept(digits) {
} }
// decimal point? // decimal point?
if s.accept(period) { if s.accept(period) {
// fraction? // fraction?
for s.accept(decimalDigits) { for s.accept(digits) {
} }
} }
// exponent? // exponent?
if s.accept(exponent) { if s.accept(exp) {
// leading sign? // leading sign?
s.accept(sign) s.accept(sign)
// digits? // digits?
for s.accept(decimalDigits) { for s.accept(decimalDigits + "_") {
} }
} }
return string(s.buf) return string(s.buf)
...@@ -749,9 +761,21 @@ func (s *ss) complexTokens() (real, imag string) { ...@@ -749,9 +761,21 @@ func (s *ss) complexTokens() (real, imag string) {
return real, imagSign + imag return real, imagSign + imag
} }
func hasX(s string) bool {
for i := 0; i < len(s); i++ {
if s[i] == 'x' || s[i] == 'X' {
return true
}
}
return false
}
// convertFloat converts the string to a float64value. // convertFloat converts the string to a float64value.
func (s *ss) convertFloat(str string, n int) float64 { func (s *ss) convertFloat(str string, n int) float64 {
if p := indexRune(str, 'p'); p >= 0 { // strconv.ParseFloat will handle "+0x1.fp+2",
// but we have to implement our non-standard
// decimal+binary exponent mix (1.2p4) ourselves.
if p := indexRune(str, 'p'); p >= 0 && !hasX(str) {
// Atof doesn't handle power-of-2 exponents, // Atof doesn't handle power-of-2 exponents,
// but they're easy to evaluate. // but they're easy to evaluate.
f, err := strconv.ParseFloat(str[:p], n) f, err := strconv.ParseFloat(str[:p], n)
......
...@@ -124,12 +124,18 @@ var scanTests = []ScanTest{ ...@@ -124,12 +124,18 @@ var scanTests = []ScanTest{
{"T\n", &boolVal, true}, // boolean test vals toggle to be sure they are written {"T\n", &boolVal, true}, // boolean test vals toggle to be sure they are written
{"F\n", &boolVal, false}, // restored to zero value {"F\n", &boolVal, false}, // restored to zero value
{"21\n", &intVal, 21}, {"21\n", &intVal, 21},
{"2_1\n", &intVal, 21},
{"0\n", &intVal, 0}, {"0\n", &intVal, 0},
{"000\n", &intVal, 0}, {"000\n", &intVal, 0},
{"0x10\n", &intVal, 0x10}, {"0x10\n", &intVal, 0x10},
{"0x_1_0\n", &intVal, 0x10},
{"-0x10\n", &intVal, -0x10}, {"-0x10\n", &intVal, -0x10},
{"0377\n", &intVal, 0377}, {"0377\n", &intVal, 0377},
{"0_3_7_7\n", &intVal, 0377},
{"0o377\n", &intVal, 0377},
{"0o_3_7_7\n", &intVal, 0377},
{"-0377\n", &intVal, -0377}, {"-0377\n", &intVal, -0377},
{"-0o377\n", &intVal, -0377},
{"0\n", &uintVal, uint(0)}, {"0\n", &uintVal, uint(0)},
{"000\n", &uintVal, uint(0)}, {"000\n", &uintVal, uint(0)},
{"0x10\n", &uintVal, uint(0x10)}, {"0x10\n", &uintVal, uint(0x10)},
...@@ -163,13 +169,20 @@ var scanTests = []ScanTest{ ...@@ -163,13 +169,20 @@ var scanTests = []ScanTest{
{"2.3e2\n", &float64Val, 2.3e2}, {"2.3e2\n", &float64Val, 2.3e2},
{"2.3p2\n", &float64Val, 2.3 * 4}, {"2.3p2\n", &float64Val, 2.3 * 4},
{"2.3p+2\n", &float64Val, 2.3 * 4}, {"2.3p+2\n", &float64Val, 2.3 * 4},
{"2.3p+66\n", &float64Val, 2.3 * (1 << 32) * (1 << 32) * 4}, {"2.3p+66\n", &float64Val, 2.3 * (1 << 66)},
{"2.3p-66\n", &float64Val, 2.3 / ((1 << 32) * (1 << 32) * 4)}, {"2.3p-66\n", &float64Val, 2.3 / (1 << 66)},
{"0x2.3p-66\n", &float64Val, float64(0x23) / (1 << 70)},
{"2_3.4_5\n", &float64Val, 23.45},
{"2.35\n", &stringVal, "2.35"}, {"2.35\n", &stringVal, "2.35"},
{"2345678\n", &bytesVal, []byte("2345678")}, {"2345678\n", &bytesVal, []byte("2345678")},
{"(3.4e1-2i)\n", &complex128Val, 3.4e1 - 2i}, {"(3.4e1-2i)\n", &complex128Val, 3.4e1 - 2i},
{"-3.45e1-3i\n", &complex64Val, complex64(-3.45e1 - 3i)}, {"-3.45e1-3i\n", &complex64Val, complex64(-3.45e1 - 3i)},
{"-.45e1-1e2i\n", &complex128Val, complex128(-.45e1 - 100i)}, {"-.45e1-1e2i\n", &complex128Val, complex128(-.45e1 - 100i)},
{"-.4_5e1-1E2i\n", &complex128Val, complex128(-.45e1 - 100i)},
{"0x1.0p1+0x1.0P2i\n", &complex128Val, complex128(2 + 4i)},
{"-0x1p1-0x1p2i\n", &complex128Val, complex128(-2 - 4i)},
{"-0x1ep-1-0x1p2i\n", &complex128Val, complex128(-15 - 4i)},
{"-0x1_Ep-1-0x1p0_2i\n", &complex128Val, complex128(-15 - 4i)},
{"hello\n", &stringVal, "hello"}, {"hello\n", &stringVal, "hello"},
// Carriage-return followed by newline. (We treat \r\n as \n always.) // Carriage-return followed by newline. (We treat \r\n as \n always.)
...@@ -207,8 +220,15 @@ var scanfTests = []ScanfTest{ ...@@ -207,8 +220,15 @@ var scanfTests = []ScanfTest{
{"%v", "TRUE\n", &boolVal, true}, {"%v", "TRUE\n", &boolVal, true},
{"%t", "false\n", &boolVal, false}, {"%t", "false\n", &boolVal, false},
{"%v", "-71\n", &intVal, -71}, {"%v", "-71\n", &intVal, -71},
{"%v", "-7_1\n", &intVal, -71},
{"%v", "0b111\n", &intVal, 7},
{"%v", "0b_1_1_1\n", &intVal, 7},
{"%v", "0377\n", &intVal, 0377}, {"%v", "0377\n", &intVal, 0377},
{"%v", "0_3_7_7\n", &intVal, 0377},
{"%v", "0o377\n", &intVal, 0377},
{"%v", "0o_3_7_7\n", &intVal, 0377},
{"%v", "0x44\n", &intVal, 0x44}, {"%v", "0x44\n", &intVal, 0x44},
{"%v", "0x_4_4\n", &intVal, 0x44},
{"%d", "72\n", &intVal, 72}, {"%d", "72\n", &intVal, 72},
{"%c", "a\n", &runeVal, 'a'}, {"%c", "a\n", &runeVal, 'a'},
{"%c", "\u5072\n", &runeVal, '\u5072'}, {"%c", "\u5072\n", &runeVal, '\u5072'},
...@@ -222,17 +242,31 @@ var scanfTests = []ScanfTest{ ...@@ -222,17 +242,31 @@ var scanfTests = []ScanfTest{
{"%x", "a75\n", &intVal, 0xa75}, {"%x", "a75\n", &intVal, 0xa75},
{"%v", "71\n", &uintVal, uint(71)}, {"%v", "71\n", &uintVal, uint(71)},
{"%d", "72\n", &uintVal, uint(72)}, {"%d", "72\n", &uintVal, uint(72)},
{"%d", "7_2\n", &uintVal, uint(7)}, // only %v takes underscores
{"%d", "73\n", &uint8Val, uint8(73)}, {"%d", "73\n", &uint8Val, uint8(73)},
{"%d", "74\n", &uint16Val, uint16(74)}, {"%d", "74\n", &uint16Val, uint16(74)},
{"%d", "75\n", &uint32Val, uint32(75)}, {"%d", "75\n", &uint32Val, uint32(75)},
{"%d", "76\n", &uint64Val, uint64(76)}, {"%d", "76\n", &uint64Val, uint64(76)},
{"%b", "1001001\n", &uintVal, uint(73)}, {"%b", "1001001\n", &uintVal, uint(73)},
{"%b", "100_1001\n", &uintVal, uint(4)},
{"%o", "075\n", &uintVal, uint(075)}, {"%o", "075\n", &uintVal, uint(075)},
{"%o", "07_5\n", &uintVal, uint(07)}, // only %v takes underscores
{"%x", "a75\n", &uintVal, uint(0xa75)}, {"%x", "a75\n", &uintVal, uint(0xa75)},
{"%x", "A75\n", &uintVal, uint(0xa75)}, {"%x", "A75\n", &uintVal, uint(0xa75)},
{"%x", "A7_5\n", &uintVal, uint(0xa7)}, // only %v takes underscores
{"%U", "U+1234\n", &intVal, int(0x1234)}, {"%U", "U+1234\n", &intVal, int(0x1234)},
{"%U", "U+4567\n", &uintVal, uint(0x4567)}, {"%U", "U+4567\n", &uintVal, uint(0x4567)},
{"%e", "2.3\n", &float64Val, 2.3},
{"%E", "2.3e1\n", &float32Val, float32(2.3e1)},
{"%f", "2.3e2\n", &float64Val, 2.3e2},
{"%g", "2.3p2\n", &float64Val, 2.3 * 4},
{"%G", "2.3p+2\n", &float64Val, 2.3 * 4},
{"%v", "2.3p+66\n", &float64Val, 2.3 * (1 << 66)},
{"%f", "2.3p-66\n", &float64Val, 2.3 / (1 << 66)},
{"%G", "0x2.3p-66\n", &float64Val, float64(0x23) / (1 << 70)},
{"%E", "2_3.4_5\n", &float64Val, 23.45},
// Strings // Strings
{"%s", "using-%s\n", &stringVal, "using-%s"}, {"%s", "using-%s\n", &stringVal, "using-%s"},
{"%x", "7573696e672d2578\n", &stringVal, "using-%x"}, {"%x", "7573696e672d2578\n", &stringVal, "using-%x"},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment