Commit 34fb5855 authored by Robert Griesemer's avatar Robert Griesemer

text/scanner: don't liberally consume (invalid) floats or underbars

This is a follow-up on https://golang.org/cl/161199 which introduced
the new Go 2 number literals to text/scanner.

That change introduced a bug by allowing decimal and hexadecimal floats
to be consumed even if the scanner was not configured to accept floats.

This CL changes the code to not consume a radix dot '.' or exponent
unless the scanner is configured to accept floats.

This CL also introduces a new mode "AllowNumberbars" which controls
whether underbars '_' are permitted as digit separators in numbers
or not.

There is a possibility that we may need to refine text/scanner
further (e.g., the Float mode now includes hexadecimal floats
which it didn't recognize before). We're very early in the cycle,
so let's see how it goes.

RELNOTE=yes

Updates #12711.
Updates #19308.
Updates #28493.
Updates #29008.

Fixes #30320.

Change-Id: I6481d314f0384e09ef6803ffad38dc529b1e89a3
Reviewed-on: https://go-review.googlesource.com/c/163079Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent 153c0da8
...@@ -457,3 +457,4 @@ pkg syscall (freebsd-arm-cgo), type Stat_t struct, Nlink uint16 ...@@ -457,3 +457,4 @@ pkg syscall (freebsd-arm-cgo), type Stat_t struct, Nlink uint16
pkg syscall (freebsd-arm-cgo), type Stat_t struct, Rdev uint32 pkg syscall (freebsd-arm-cgo), type Stat_t struct, Rdev uint32
pkg syscall (freebsd-arm-cgo), type Statfs_t struct, Mntfromname [88]int8 pkg syscall (freebsd-arm-cgo), type Statfs_t struct, Mntfromname [88]int8
pkg syscall (freebsd-arm-cgo), type Statfs_t struct, Mntonname [88]int8 pkg syscall (freebsd-arm-cgo), type Statfs_t struct, Mntonname [88]int8
pkg text/scanner, const GoTokens = 1012
\ No newline at end of file
pkg text/scanner, const AllowNumberbars = 1024
pkg text/scanner, const AllowNumberbars ideal-int
pkg text/scanner, const GoTokens = 2036
...@@ -59,15 +59,16 @@ func (pos Position) String() string { ...@@ -59,15 +59,16 @@ func (pos Position) String() string {
// "foo" is scanned as the token sequence '"' Ident '"'. // "foo" is scanned as the token sequence '"' Ident '"'.
// //
const ( const (
ScanIdents = 1 << -Ident ScanIdents = 1 << -Ident
ScanInts = 1 << -Int ScanInts = 1 << -Int
ScanFloats = 1 << -Float // includes Ints ScanFloats = 1 << -Float // includes Ints and hexadecimal floats
ScanChars = 1 << -Char ScanChars = 1 << -Char
ScanStrings = 1 << -String ScanStrings = 1 << -String
ScanRawStrings = 1 << -RawString ScanRawStrings = 1 << -RawString
ScanComments = 1 << -Comment ScanComments = 1 << -Comment
SkipComments = 1 << -skipComment // if set with ScanComments, comments become white space SkipComments = 1 << -skipComment // if set with ScanComments, comments become white space
GoTokens = ScanIdents | ScanFloats | ScanChars | ScanStrings | ScanRawStrings | ScanComments | SkipComments AllowNumberbars = 1 << -allowNumberbars // if set, number literals may contain underbars as digit separators
GoTokens = ScanIdents | ScanFloats | ScanChars | ScanStrings | ScanRawStrings | ScanComments | SkipComments | AllowNumberbars
) )
// The result of Scan is one of these tokens or a Unicode character. // The result of Scan is one of these tokens or a Unicode character.
...@@ -80,7 +81,10 @@ const ( ...@@ -80,7 +81,10 @@ const (
String String
RawString RawString
Comment Comment
// internal use only
skipComment skipComment
allowNumberbars
) )
var tokenString = map[rune]string{ var tokenString = map[rune]string{
...@@ -359,7 +363,8 @@ func lower(ch rune) rune { return ('a' - 'A') | ch } // returns lower-case c ...@@ -359,7 +363,8 @@ func lower(ch rune) rune { return ('a' - 'A') | ch } // returns lower-case c
func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' } func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' }
func isHex(ch rune) bool { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' } func isHex(ch rune) bool { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' }
// digits accepts the sequence { digit | '_' } starting with ch0. // digits accepts the sequence { digit } (if AllowNumberbars is not set)
// or { digit | '_' } (if AllowNumberbars is set), starting with ch0.
// If base <= 10, digits accepts any decimal digit but records // If base <= 10, digits accepts any decimal digit but records
// the first invalid digit >= base in *invalid if *invalid == 0. // the first invalid digit >= base in *invalid if *invalid == 0.
// digits returns the first rune that is not part of the sequence // digits returns the first rune that is not part of the sequence
...@@ -369,7 +374,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int ...@@ -369,7 +374,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int
ch = ch0 ch = ch0
if base <= 10 { if base <= 10 {
max := rune('0' + base) max := rune('0' + base)
for isDecimal(ch) || ch == '_' { for isDecimal(ch) || ch == '_' && s.Mode&AllowNumberbars != 0 {
ds := 1 ds := 1
if ch == '_' { if ch == '_' {
ds = 2 ds = 2
...@@ -380,7 +385,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int ...@@ -380,7 +385,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int
ch = s.next() ch = s.next()
} }
} else { } else {
for isHex(ch) || ch == '_' { for isHex(ch) || ch == '_' && s.Mode&AllowNumberbars != 0 {
ds := 1 ds := 1
if ch == '_' { if ch == '_' {
ds = 2 ds = 2
...@@ -392,7 +397,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int ...@@ -392,7 +397,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int
return return
} }
func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) { func (s *Scanner) scanNumber(ch rune, seenDot bool) (rune, rune) {
base := 10 // number base base := 10 // number base
prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b' prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
digsep := 0 // bit 0: digit present, bit 1: '_' present digsep := 0 // bit 0: digit present, bit 1: '_' present
...@@ -401,7 +406,7 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) { ...@@ -401,7 +406,7 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
// integer part // integer part
var tok rune var tok rune
var ds int var ds int
if integerPart { if !seenDot {
tok = Int tok = Int
if ch == '0' { if ch == '0' {
ch = s.next() ch = s.next()
...@@ -422,17 +427,18 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) { ...@@ -422,17 +427,18 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
} }
ch, ds = s.digits(ch, base, &invalid) ch, ds = s.digits(ch, base, &invalid)
digsep |= ds digsep |= ds
if ch == '.' && s.Mode&ScanFloats != 0 {
ch = s.next()
seenDot = true
}
} }
// fractional part // fractional part
if !integerPart || ch == '.' { if seenDot {
tok = Float tok = Float
if prefix == 'o' || prefix == 'b' { if prefix == 'o' || prefix == 'b' {
s.error("invalid radix point in " + litname(prefix)) s.error("invalid radix point in " + litname(prefix))
} }
if ch == '.' {
ch = s.next()
}
ch, ds = s.digits(ch, base, &invalid) ch, ds = s.digits(ch, base, &invalid)
digsep |= ds digsep |= ds
} }
...@@ -442,7 +448,7 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) { ...@@ -442,7 +448,7 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
} }
// exponent // exponent
if e := lower(ch); e == 'e' || e == 'p' { if e := lower(ch); (e == 'e' || e == 'p') && s.Mode&ScanFloats != 0 {
switch { switch {
case e == 'e' && prefix != 0 && prefix != '0': case e == 'e' && prefix != 0 && prefix != '0':
s.errorf("%q exponent requires decimal mantissa", ch) s.errorf("%q exponent requires decimal mantissa", ch)
...@@ -682,7 +688,7 @@ redo: ...@@ -682,7 +688,7 @@ redo:
} }
case isDecimal(ch): case isDecimal(ch):
if s.Mode&(ScanInts|ScanFloats) != 0 { if s.Mode&(ScanInts|ScanFloats) != 0 {
tok, ch = s.scanNumber(ch, true) tok, ch = s.scanNumber(ch, false)
} else { } else {
ch = s.next() ch = s.next()
} }
...@@ -705,7 +711,7 @@ redo: ...@@ -705,7 +711,7 @@ redo:
case '.': case '.':
ch = s.next() ch = s.next()
if isDecimal(ch) && s.Mode&ScanFloats != 0 { if isDecimal(ch) && s.Mode&ScanFloats != 0 {
tok, ch = s.scanNumber(ch, false) tok, ch = s.scanNumber(ch, true)
} }
case '/': case '/':
ch = s.next() ch = s.next()
......
...@@ -877,3 +877,40 @@ func TestNumbers(t *testing.T) { ...@@ -877,3 +877,40 @@ func TestNumbers(t *testing.T) {
} }
} }
} }
func TestIssue30320(t *testing.T) {
for _, test := range []struct {
in, want string
mode uint
}{
{"foo01.bar31.xx-0-1-1-0", "01 31 0 1 1 0", ScanInts},
{"foo0/12/0/5.67", "0 12 0 5 67", ScanInts},
{"xxx1e0yyy", "1 0", ScanInts},
{"1_2", "1 2", ScanInts}, // don't consume _ as part of a number if not explicitly enabled
{"1_2", "1_2", ScanInts | AllowNumberbars},
{"xxx1.0yyy2e3ee", "1 0 2 3", ScanInts},
{"xxx1.0yyy2e3ee", "1.0 2e3", ScanFloats},
} {
got := extractInts(test.in, test.mode)
if got != test.want {
t.Errorf("%q: got %q; want %q", test.in, got, test.want)
}
}
}
func extractInts(t string, mode uint) (res string) {
var s Scanner
s.Init(strings.NewReader(t))
s.Mode = mode
for {
switch tok := s.Scan(); tok {
case Int, Float:
if len(res) > 0 {
res += " "
}
res += s.TokenText()
case EOF:
return
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment