From 36a81d5ec37904e44a9a2959c140687d558cd25f Mon Sep 17 00:00:00 2001 From: Russ Cox <rsc@golang.org> Date: Tue, 29 Jan 2019 20:59:33 -0500 Subject: [PATCH] strconv: accept underscores in ParseInt, ParseUint, ParseFloat This CL modifies ParseInt, ParseUint, and ParseFloat to accept digit-separating underscores in their arguments. For ParseInt and ParseUint, the underscores are only allowed when base == 0. See golang.org/design/19308-number-literals for background. For #28493. Change-Id: I057ca2539d89314643f591ba8144c3ea7126651c Reviewed-on: https://go-review.googlesource.com/c/160243 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org> --- src/strconv/atof.go | 22 ++++++++++-- src/strconv/atof_test.go | 37 ++++++++++++++++++++ src/strconv/atoi.go | 70 ++++++++++++++++++++++++++++++++----- src/strconv/atoi_test.go | 75 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 193 insertions(+), 11 deletions(-) diff --git a/src/strconv/atof.go b/src/strconv/atof.go index 3ced3c7167..504b9613fb 100644 --- a/src/strconv/atof.go +++ b/src/strconv/atof.go @@ -83,6 +83,9 @@ func (b *decimal) set(s string) (ok bool) { sawdigits := false for ; i < len(s); i++ { switch { + case s[i] == '_': + // underscoreOK already called + continue case s[i] == '.': if sawdot { return @@ -135,7 +138,11 @@ func (b *decimal) set(s string) (ok bool) { return } e := 0 - for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ { + for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ { + if s[i] == '_' { + // underscoreOK already called + continue + } if e < 10000 { e = e*10 + int(s[i]) - '0' } @@ -187,6 +194,10 @@ func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex, ok bool) { dp := 0 for ; i < len(s); i++ { switch c := s[i]; true { + case c == '_': + // underscoreOK already called + continue + case c == '.': if sawdot { return @@ -258,7 +269,11 @@ func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex, ok bool) { return } e := 0 - for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ { + for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ { + if s[i] == '_' { + // underscoreOK already called + continue + } if e < 10000 { e = e*10 + int(s[i]) - '0' } @@ -640,6 +655,9 @@ func atof64(s string) (f float64, err error) { // away from the largest floating point number of the given size, // ParseFloat returns f = 卤Inf, err.Err = ErrRange. func ParseFloat(s string, bitSize int) (float64, error) { + if !underscoreOK(s) { + return 0, syntaxError(fnParseFloat, s) + } if bitSize == 32 { f, err := atof32(s) return float64(f), err diff --git a/src/strconv/atof_test.go b/src/strconv/atof_test.go index f67386a000..abe6c64466 100644 --- a/src/strconv/atof_test.go +++ b/src/strconv/atof_test.go @@ -47,6 +47,9 @@ var atoftests = []atofTest{ {"0x1p0", "1", nil}, {"0x1p1", "2", nil}, {"0x1p-1", "0.5", nil}, + {"0x1ep-1", "15", nil}, + {"-0x1ep-1", "-15", nil}, + {"-0x1_ep-1", "-15", nil}, {"0x1p-200", "6.223015277861142e-61", nil}, {"0x1p200", "1.6069380442589903e+60", nil}, {"0x1fFe2.p0", "131042", nil}, @@ -299,6 +302,40 @@ var atoftests = []atofTest{ // Round to even (up). {"1.00000000000000033306690738754696212708950042724609375", "1.0000000000000004", nil}, {"0x1.00000000000018p0", "1.0000000000000004", nil}, + + // Underscores. + {"1_23.50_0_0e+1_2", "1.235e+14", nil}, + {"-_123.5e+12", "0", ErrSyntax}, + {"+_123.5e+12", "0", ErrSyntax}, + {"_123.5e+12", "0", ErrSyntax}, + {"1__23.5e+12", "0", ErrSyntax}, + {"123_.5e+12", "0", ErrSyntax}, + {"123._5e+12", "0", ErrSyntax}, + {"123.5_e+12", "0", ErrSyntax}, + {"123.5__0e+12", "0", ErrSyntax}, + {"123.5e_+12", "0", ErrSyntax}, + {"123.5e+_12", "0", ErrSyntax}, + {"123.5e_-12", "0", ErrSyntax}, + {"123.5e-_12", "0", ErrSyntax}, + {"123.5e+1__2", "0", ErrSyntax}, + {"123.5e+12_", "0", ErrSyntax}, + + {"0x_1_2.3_4_5p+1_2", "74565", nil}, + {"-_0x12.345p+12", "0", ErrSyntax}, + {"+_0x12.345p+12", "0", ErrSyntax}, + {"_0x12.345p+12", "0", ErrSyntax}, + {"0x__12.345p+12", "0", ErrSyntax}, + {"0x1__2.345p+12", "0", ErrSyntax}, + {"0x12_.345p+12", "0", ErrSyntax}, + {"0x12._345p+12", "0", ErrSyntax}, + {"0x12.3__45p+12", "0", ErrSyntax}, + {"0x12.345_p+12", "0", ErrSyntax}, + {"0x12.345p_+12", "0", ErrSyntax}, + {"0x12.345p+_12", "0", ErrSyntax}, + {"0x12.345p_-12", "0", ErrSyntax}, + {"0x12.345p-_12", "0", ErrSyntax}, + {"0x12.345p+1__2", "0", ErrSyntax}, + {"0x12.345p+12_", "0", ErrSyntax}, } var atof32tests = []atofTest{ diff --git a/src/strconv/atoi.go b/src/strconv/atoi.go index 186c9b3f86..ecbc9f4bbe 100644 --- a/src/strconv/atoi.go +++ b/src/strconv/atoi.go @@ -58,10 +58,12 @@ const maxUint64 = 1<<64 - 1 func ParseUint(s string, base int, bitSize int) (uint64, error) { const fnParseUint = "ParseUint" - if len(s) == 0 { + if s == "" || !underscoreOK(s) { return 0, syntaxError(fnParseUint, s) } + base0 := base == 0 + s0 := s switch { case 2 <= base && base <= 36: @@ -70,7 +72,7 @@ func ParseUint(s string, base int, bitSize int) (uint64, error) { case base == 0: // Look for octal, hex prefix. switch { - case s[0] == '0' && len(s) > 1 && (s[1] == 'x' || s[1] == 'X'): + case s[0] == '0' && len(s) >= 3 && lower(s[1]) == 'x': if len(s) < 3 { return 0, syntaxError(fnParseUint, s0) } @@ -111,12 +113,13 @@ func ParseUint(s string, base int, bitSize int) (uint64, error) { for _, c := range []byte(s) { var d byte switch { + case c == '_' && base0: + // underscoreOK already called + continue case '0' <= c && c <= '9': d = c - '0' - case 'a' <= c && c <= 'z': - d = c - 'a' + 10 - case 'A' <= c && c <= 'Z': - d = c - 'A' + 10 + case 'a' <= lower(c) && lower(c) <= 'z': + d = lower(c) - 'a' + 10 default: return 0, syntaxError(fnParseUint, s0) } @@ -164,8 +167,7 @@ func ParseUint(s string, base int, bitSize int) (uint64, error) { func ParseInt(s string, base int, bitSize int) (i int64, err error) { const fnParseInt = "ParseInt" - // Empty string bad. - if len(s) == 0 { + if s == "" { return 0, syntaxError(fnParseInt, s) } @@ -236,10 +238,60 @@ func Atoi(s string) (int, error) { return n, nil } - // Slow path for invalid or big integers. + // Slow path for invalid, big, or underscored integers. i64, err := ParseInt(s, 10, 0) if nerr, ok := err.(*NumError); ok { nerr.Func = fnAtoi } return int(i64), err } + +// underscoreOK reports whether the underscores in s are allowed. +// Checking them in this one function lets all the parsers skip over them simply. +// Underscore must appear only between digits or between a base prefix and a digit. +func underscoreOK(s string) bool { + // saw tracks the last character (class) we saw: + // ^ for beginning of number, + // 0 for a digit or base prefix, + // _ for an underscore, + // ! for none of the above. + saw := '^' + i := 0 + + // Optional sign. + if len(s) >= 1 && (s[0] == '-' || s[0] == '+') { + s = s[1:] + } + + // Optional base prefix. + hex := false + if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') { + i = 2 + saw = '0' // base prefix counts as a digit for "underscore as digit separator" + hex = lower(s[1]) == 'x' + } + + // Number proper. + for ; i < len(s); i++ { + // Digits are always okay. + if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' { + saw = '0' + continue + } + // Underscore must follow digit. + if s[i] == '_' { + if saw != '0' { + return false + } + saw = '_' + continue + } + // Underscore must also be followed by digit. + if saw == '_' { + return false + } + // Saw non-digit, non-underscore. + saw = '!' + } + return saw != '_' +} diff --git a/src/strconv/atoi_test.go b/src/strconv/atoi_test.go index e2f505a665..ec0542abf8 100644 --- a/src/strconv/atoi_test.go +++ b/src/strconv/atoi_test.go @@ -29,6 +29,10 @@ var parseUint64Tests = []parseUint64Test{ {"18446744073709551615", 1<<64 - 1, nil}, {"18446744073709551616", 1<<64 - 1, ErrRange}, {"18446744073709551620", 1<<64 - 1, ErrRange}, + {"1_2_3_4_5", 0, ErrSyntax}, // base=10 so no underscores allowed + {"_12345", 0, ErrSyntax}, + {"1__2345", 0, ErrSyntax}, + {"12345_", 0, ErrSyntax}, } type parseUint64BaseTest struct { @@ -61,6 +65,42 @@ var parseUint64BaseTests = []parseUint64BaseTest{ {"01777777777777777777778", 0, 0, ErrSyntax}, {"02000000000000000000000", 0, 1<<64 - 1, ErrRange}, {"0200000000000000000000", 0, 1 << 61, nil}, + + // underscores allowed with base == 0 only + {"1_2_3_4_5", 0, 12345, nil}, + {"_12345", 0, 0, ErrSyntax}, + {"1__2345", 0, 0, ErrSyntax}, + {"12345_", 0, 0, ErrSyntax}, + + {"1_2_3_4_5", 10, 0, ErrSyntax}, + {"_12345", 10, 0, ErrSyntax}, + {"1__2345", 10, 0, ErrSyntax}, + {"12345_", 10, 0, ErrSyntax}, + + {"0x_1_2_3_4_5", 0, 0x12345, nil}, + {"_0x12345", 0, 0, ErrSyntax}, + {"0x__12345", 0, 0, ErrSyntax}, + {"0x1__2345", 0, 0, ErrSyntax}, + {"0x1234__5", 0, 0, ErrSyntax}, + {"0x12345_", 0, 0, ErrSyntax}, + + {"1_2_3_4_5", 16, 0, ErrSyntax}, + {"_12345", 16, 0, ErrSyntax}, + {"1__2345", 16, 0, ErrSyntax}, + {"1234__5", 16, 0, ErrSyntax}, + {"12345_", 16, 0, ErrSyntax}, + + {"0_1_2_3_4_5", 0, 012345, nil}, + {"_012345", 0, 0, ErrSyntax}, + {"0__12345", 0, 0, ErrSyntax}, + {"01234__5", 0, 0, ErrSyntax}, + {"012345_", 0, 0, ErrSyntax}, + + {"0_1_2_3_4_5", 8, 0, ErrSyntax}, + {"_012345", 8, 0, ErrSyntax}, + {"0__12345", 8, 0, ErrSyntax}, + {"01234__5", 8, 0, ErrSyntax}, + {"012345_", 8, 0, ErrSyntax}, } type parseInt64Test struct { @@ -87,6 +127,11 @@ var parseInt64Tests = []parseInt64Test{ {"-9223372036854775808", -1 << 63, nil}, {"9223372036854775809", 1<<63 - 1, ErrRange}, {"-9223372036854775809", -1 << 63, ErrRange}, + {"-1_2_3_4_5", 0, ErrSyntax}, // base=10 so no underscores allowed + {"-_12345", 0, ErrSyntax}, + {"_12345", 0, ErrSyntax}, + {"1__2345", 0, ErrSyntax}, + {"12345_", 0, ErrSyntax}, } type parseInt64BaseTest struct { @@ -144,6 +189,26 @@ var parseInt64BaseTests = []parseInt64BaseTest{ {"10", 16, 16, nil}, {"-123456789abcdef", 16, -0x123456789abcdef, nil}, {"7fffffffffffffff", 16, 1<<63 - 1, nil}, + + // underscores + {"-0x_1_2_3_4_5", 0, -0x12345, nil}, + {"0x_1_2_3_4_5", 0, 0x12345, nil}, + {"-_0x12345", 0, 0, ErrSyntax}, + {"_-0x12345", 0, 0, ErrSyntax}, + {"_0x12345", 0, 0, ErrSyntax}, + {"0x__12345", 0, 0, ErrSyntax}, + {"0x1__2345", 0, 0, ErrSyntax}, + {"0x1234__5", 0, 0, ErrSyntax}, + {"0x12345_", 0, 0, ErrSyntax}, + + {"-0_1_2_3_4_5", 0, -012345, nil}, // octal + {"0_1_2_3_4_5", 0, 012345, nil}, // octal + {"-_012345", 0, 0, ErrSyntax}, + {"_-012345", 0, 0, ErrSyntax}, + {"_012345", 0, 0, ErrSyntax}, + {"0__12345", 0, 0, ErrSyntax}, + {"01234__5", 0, 0, ErrSyntax}, + {"012345_", 0, 0, ErrSyntax}, } type parseUint32Test struct { @@ -162,6 +227,11 @@ var parseUint32Tests = []parseUint32Test{ {"987654321", 987654321, nil}, {"4294967295", 1<<32 - 1, nil}, {"4294967296", 1<<32 - 1, ErrRange}, + {"1_2_3_4_5", 0, ErrSyntax}, // base=10 so no underscores allowed + {"_12345", 0, ErrSyntax}, + {"_12345", 0, ErrSyntax}, + {"1__2345", 0, ErrSyntax}, + {"12345_", 0, ErrSyntax}, } type parseInt32Test struct { @@ -190,6 +260,11 @@ var parseInt32Tests = []parseInt32Test{ {"-2147483648", -1 << 31, nil}, {"2147483649", 1<<31 - 1, ErrRange}, {"-2147483649", -1 << 31, ErrRange}, + {"-1_2_3_4_5", 0, ErrSyntax}, // base=10 so no underscores allowed + {"-_12345", 0, ErrSyntax}, + {"_12345", 0, ErrSyntax}, + {"1__2345", 0, ErrSyntax}, + {"12345_", 0, ErrSyntax}, } type numErrorTest struct { -- 2.30.9