Commit 710417bc authored by Robert Griesemer's avatar Robert Griesemer

text/scanner: accept new Go2 number literals

This CL introduces text/scanner support for the new binary and octal integer
literals, hexadecimal floats, and digit separators for all number literals.
The new code is closely mirroring the respective code for number literals in
cmd/compile/internal/syntax/scanner.go.

Uniformly use the term "invalid" rather than "illegal" in error messages
to match the respective error messages in the other scanners directly.

R=Go1.13

Updates #12711.
Updates #19308.
Updates #28493.
Updates #29008.

Change-Id: I2f291de13ba5afc0e530cd8326e6bf4c3858ebac
Reviewed-on: https://go-review.googlesource.com/c/161199
Run-TryBot: Robert Griesemer <gri@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent 33ac8544
...@@ -266,7 +266,7 @@ func (s *Scanner) next() rune { ...@@ -266,7 +266,7 @@ func (s *Scanner) next() rune {
s.srcPos += width s.srcPos += width
s.lastCharLen = width s.lastCharLen = width
s.column++ s.column++
s.error("illegal UTF-8 encoding") s.error("invalid UTF-8 encoding")
return ch return ch
} }
} }
...@@ -281,7 +281,7 @@ func (s *Scanner) next() rune { ...@@ -281,7 +281,7 @@ func (s *Scanner) next() rune {
switch ch { switch ch {
case 0: case 0:
// for compatibility with other tools // for compatibility with other tools
s.error("illegal character NUL") s.error("invalid character NUL")
case '\n': case '\n':
s.line++ s.line++
s.lastLineLen = s.column s.lastLineLen = s.column
...@@ -335,6 +335,10 @@ func (s *Scanner) error(msg string) { ...@@ -335,6 +335,10 @@ func (s *Scanner) error(msg string) {
fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg) fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
} }
func (s *Scanner) errorf(format string, args ...interface{}) {
s.error(fmt.Sprintf(format, args...))
}
func (s *Scanner) isIdentRune(ch rune, i int) bool { func (s *Scanner) isIdentRune(ch rune, i int) bool {
if s.IsIdentRune != nil { if s.IsIdentRune != nil {
return s.IsIdentRune(ch, i) return s.IsIdentRune(ch, i)
...@@ -351,95 +355,189 @@ func (s *Scanner) scanIdentifier() rune { ...@@ -351,95 +355,189 @@ func (s *Scanner) scanIdentifier() rune {
return ch return ch
} }
func digitVal(ch rune) int { func lower(ch rune) rune { return ('a' - 'A') | ch } // returns lower-case ch iff ch is ASCII letter
switch { func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' }
case '0' <= ch && ch <= '9': func isHex(ch rune) bool { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' }
return int(ch - '0')
case 'a' <= ch && ch <= 'f': // digits accepts the sequence { digit | '_' } starting with ch0.
return int(ch - 'a' + 10) // If base <= 10, digits accepts any decimal digit but records
case 'A' <= ch && ch <= 'F': // the first invalid digit >= base in *invalid if *invalid == 0.
return int(ch - 'A' + 10) // digits returns the first rune that is not part of the sequence
// anymore, and a bitset describing whether the sequence contained
// digits (bit 0 is set), or separators '_' (bit 1 is set).
func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int) {
ch = ch0
if base <= 10 {
max := rune('0' + base)
for isDecimal(ch) || ch == '_' {
ds := 1
if ch == '_' {
ds = 2
} else if ch >= max && *invalid == 0 {
*invalid = ch
}
digsep |= ds
ch = s.next()
} }
return 16 // larger than any legal digit val } else {
for isHex(ch) || ch == '_' {
ds := 1
if ch == '_' {
ds = 2
}
digsep |= ds
ch = s.next()
}
}
return
} }
func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' } func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
base := 10 // number base
func (s *Scanner) scanMantissa(ch rune) rune { prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
for isDecimal(ch) { digsep := 0 // bit 0: digit present, bit 1: '_' present
invalid := rune(0) // invalid digit in literal, or 0
// integer part
var tok rune
var ds int
if integerPart {
tok = Int
if ch == '0' {
ch = s.next() ch = s.next()
switch lower(ch) {
case 'x':
ch = s.next()
base, prefix = 16, 'x'
case 'o':
ch = s.next()
base, prefix = 8, 'o'
case 'b':
ch = s.next()
base, prefix = 2, 'b'
default:
base, prefix = 8, '0'
digsep = 1 // leading 0
}
}
ch, ds = s.digits(ch, base, &invalid)
digsep |= ds
} }
return ch
}
func (s *Scanner) scanFraction(ch rune) rune { // fractional part
if !integerPart || ch == '.' {
tok = Float
if prefix == 'o' || prefix == 'b' {
s.error("invalid radix point in " + litname(prefix))
}
if ch == '.' { if ch == '.' {
ch = s.scanMantissa(s.next()) ch = s.next()
}
ch, ds = s.digits(ch, base, &invalid)
digsep |= ds
}
if digsep&1 == 0 {
s.error(litname(prefix) + " has no digits")
} }
return ch
}
func (s *Scanner) scanExponent(ch rune) rune { // exponent
if ch == 'e' || ch == 'E' { if e := lower(ch); e == 'e' || e == 'p' {
switch {
case e == 'e' && prefix != 0 && prefix != '0':
s.errorf("%q exponent requires decimal mantissa", ch)
case e == 'p' && prefix != 'x':
s.errorf("%q exponent requires hexadecimal mantissa", ch)
}
ch = s.next() ch = s.next()
if ch == '-' || ch == '+' { tok = Float
if ch == '+' || ch == '-' {
ch = s.next() ch = s.next()
} }
if !isDecimal(ch) { ch, ds = s.digits(ch, 10, nil)
s.error("illegal exponent") digsep |= ds
if ds&1 == 0 {
s.error("exponent has no digits")
} }
ch = s.scanMantissa(ch) } else if prefix == 'x' && tok == Float {
s.error("hexadecimal mantissa requires a 'p' exponent")
} }
return ch
if tok == Int && invalid != 0 {
s.errorf("invalid digit %q in %s", invalid, litname(prefix))
}
if digsep&2 != 0 {
s.tokEnd = s.srcPos - s.lastCharLen // make sure token text is terminated
if i := invalidSep(s.TokenText()); i >= 0 {
s.error("'_' must separate successive digits")
}
}
return tok, ch
} }
func (s *Scanner) scanNumber(ch rune) (rune, rune) { func litname(prefix rune) string {
// isDecimal(ch) switch prefix {
if ch == '0' { default:
// int or float return "decimal literal"
ch = s.next() case 'x':
if ch == 'x' || ch == 'X' { return "hexadecimal literal"
// hexadecimal int case 'o', '0':
ch = s.next() return "octal literal"
hasMantissa := false case 'b':
for digitVal(ch) < 16 { return "binary literal"
ch = s.next()
hasMantissa = true
} }
if !hasMantissa { }
s.error("illegal hexadecimal number")
// invalidSep returns the index of the first invalid separator in x, or -1.
func invalidSep(x string) int {
x1 := ' ' // prefix char, we only care if it's 'x'
d := '.' // digit, one of '_', '0' (a digit), or '.' (anything else)
i := 0
// a prefix counts as a digit
if len(x) >= 2 && x[0] == '0' {
x1 = lower(rune(x[1]))
if x1 == 'x' || x1 == 'o' || x1 == 'b' {
d = '0'
i = 2
} }
} else {
// octal int or float
has8or9 := false
for isDecimal(ch) {
if ch > '7' {
has8or9 = true
} }
ch = s.next()
// mantissa and exponent
for ; i < len(x); i++ {
p := d // previous digit
d = rune(x[i])
switch {
case d == '_':
if p != '0' {
return i
} }
if s.Mode&ScanFloats != 0 && (ch == '.' || ch == 'e' || ch == 'E') { case isDecimal(d) || x1 == 'x' && isHex(d):
// float d = '0'
ch = s.scanFraction(ch) default:
ch = s.scanExponent(ch) if p == '_' {
return Float, ch return i - 1
} }
// octal int d = '.'
if has8or9 {
s.error("illegal octal number")
} }
} }
return Int, ch if d == '_' {
return len(x) - 1
} }
// decimal int or float
ch = s.scanMantissa(ch) return -1
if s.Mode&ScanFloats != 0 && (ch == '.' || ch == 'e' || ch == 'E') { }
// float
ch = s.scanFraction(ch) func digitVal(ch rune) int {
ch = s.scanExponent(ch) switch {
return Float, ch case '0' <= ch && ch <= '9':
return int(ch - '0')
case 'a' <= lower(ch) && lower(ch) <= 'f':
return int(lower(ch) - 'a' + 10)
} }
return Int, ch return 16 // larger than any legal digit val
} }
func (s *Scanner) scanDigits(ch rune, base, n int) rune { func (s *Scanner) scanDigits(ch rune, base, n int) rune {
...@@ -448,7 +546,7 @@ func (s *Scanner) scanDigits(ch rune, base, n int) rune { ...@@ -448,7 +546,7 @@ func (s *Scanner) scanDigits(ch rune, base, n int) rune {
n-- n--
} }
if n > 0 { if n > 0 {
s.error("illegal char escape") s.error("invalid char escape")
} }
return ch return ch
} }
...@@ -468,7 +566,7 @@ func (s *Scanner) scanEscape(quote rune) rune { ...@@ -468,7 +566,7 @@ func (s *Scanner) scanEscape(quote rune) rune {
case 'U': case 'U':
ch = s.scanDigits(s.next(), 16, 8) ch = s.scanDigits(s.next(), 16, 8)
default: default:
s.error("illegal char escape") s.error("invalid char escape")
} }
return ch return ch
} }
...@@ -503,7 +601,7 @@ func (s *Scanner) scanRawString() { ...@@ -503,7 +601,7 @@ func (s *Scanner) scanRawString() {
func (s *Scanner) scanChar() { func (s *Scanner) scanChar() {
if s.scanString('\'') != 1 { if s.scanString('\'') != 1 {
s.error("illegal char literal") s.error("invalid char literal")
} }
} }
...@@ -584,7 +682,7 @@ redo: ...@@ -584,7 +682,7 @@ redo:
} }
case isDecimal(ch): case isDecimal(ch):
if s.Mode&(ScanInts|ScanFloats) != 0 { if s.Mode&(ScanInts|ScanFloats) != 0 {
tok, ch = s.scanNumber(ch) tok, ch = s.scanNumber(ch, true)
} else { } else {
ch = s.next() ch = s.next()
} }
...@@ -607,9 +705,7 @@ redo: ...@@ -607,9 +705,7 @@ redo:
case '.': case '.':
ch = s.next() ch = s.next()
if isDecimal(ch) && s.Mode&ScanFloats != 0 { if isDecimal(ch) && s.Mode&ScanFloats != 0 {
tok = Float tok, ch = s.scanNumber(ch, false)
ch = s.scanMantissa(ch)
ch = s.scanExponent(ch)
} }
case '/': case '/':
ch = s.next() ch = s.next()
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment