Commit 710417bc authored by Robert Griesemer's avatar Robert Griesemer

text/scanner: accept new Go2 number literals

This CL introduces text/scanner support for the new binary and octal integer
literals, hexadecimal floats, and digit separators for all number literals.
The new code is closely mirroring the respective code for number literals in
cmd/compile/internal/syntax/scanner.go.

Uniformly use the term "invalid" rather than "illegal" in error messages
to match the respective error messages in the other scanners directly.

R=Go1.13

Updates #12711.
Updates #19308.
Updates #28493.
Updates #29008.

Change-Id: I2f291de13ba5afc0e530cd8326e6bf4c3858ebac
Reviewed-on: https://go-review.googlesource.com/c/161199
Run-TryBot: Robert Griesemer <gri@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent 33ac8544
......@@ -266,7 +266,7 @@ func (s *Scanner) next() rune {
s.srcPos += width
s.lastCharLen = width
s.column++
s.error("illegal UTF-8 encoding")
s.error("invalid UTF-8 encoding")
return ch
}
}
......@@ -281,7 +281,7 @@ func (s *Scanner) next() rune {
switch ch {
case 0:
// for compatibility with other tools
s.error("illegal character NUL")
s.error("invalid character NUL")
case '\n':
s.line++
s.lastLineLen = s.column
......@@ -335,6 +335,10 @@ func (s *Scanner) error(msg string) {
fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
}
func (s *Scanner) errorf(format string, args ...interface{}) {
s.error(fmt.Sprintf(format, args...))
}
func (s *Scanner) isIdentRune(ch rune, i int) bool {
if s.IsIdentRune != nil {
return s.IsIdentRune(ch, i)
......@@ -351,95 +355,189 @@ func (s *Scanner) scanIdentifier() rune {
return ch
}
func digitVal(ch rune) int {
switch {
case '0' <= ch && ch <= '9':
return int(ch - '0')
case 'a' <= ch && ch <= 'f':
return int(ch - 'a' + 10)
case 'A' <= ch && ch <= 'F':
return int(ch - 'A' + 10)
func lower(ch rune) rune { return ('a' - 'A') | ch } // returns lower-case ch iff ch is ASCII letter
func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' }
func isHex(ch rune) bool { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' }
// digits accepts the sequence { digit | '_' } starting with ch0.
// If base <= 10, digits accepts any decimal digit but records
// the first invalid digit >= base in *invalid if *invalid == 0.
// digits returns the first rune that is not part of the sequence
// anymore, and a bitset describing whether the sequence contained
// digits (bit 0 is set), or separators '_' (bit 1 is set).
func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int) {
ch = ch0
if base <= 10 {
max := rune('0' + base)
for isDecimal(ch) || ch == '_' {
ds := 1
if ch == '_' {
ds = 2
} else if ch >= max && *invalid == 0 {
*invalid = ch
}
digsep |= ds
ch = s.next()
}
} else {
for isHex(ch) || ch == '_' {
ds := 1
if ch == '_' {
ds = 2
}
digsep |= ds
ch = s.next()
}
}
return 16 // larger than any legal digit val
return
}
func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' }
func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
base := 10 // number base
prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
digsep := 0 // bit 0: digit present, bit 1: '_' present
invalid := rune(0) // invalid digit in literal, or 0
// integer part
var tok rune
var ds int
if integerPart {
tok = Int
if ch == '0' {
ch = s.next()
switch lower(ch) {
case 'x':
ch = s.next()
base, prefix = 16, 'x'
case 'o':
ch = s.next()
base, prefix = 8, 'o'
case 'b':
ch = s.next()
base, prefix = 2, 'b'
default:
base, prefix = 8, '0'
digsep = 1 // leading 0
}
}
ch, ds = s.digits(ch, base, &invalid)
digsep |= ds
}
func (s *Scanner) scanMantissa(ch rune) rune {
for isDecimal(ch) {
ch = s.next()
// fractional part
if !integerPart || ch == '.' {
tok = Float
if prefix == 'o' || prefix == 'b' {
s.error("invalid radix point in " + litname(prefix))
}
if ch == '.' {
ch = s.next()
}
ch, ds = s.digits(ch, base, &invalid)
digsep |= ds
}
return ch
}
func (s *Scanner) scanFraction(ch rune) rune {
if ch == '.' {
ch = s.scanMantissa(s.next())
if digsep&1 == 0 {
s.error(litname(prefix) + " has no digits")
}
return ch
}
func (s *Scanner) scanExponent(ch rune) rune {
if ch == 'e' || ch == 'E' {
// exponent
if e := lower(ch); e == 'e' || e == 'p' {
switch {
case e == 'e' && prefix != 0 && prefix != '0':
s.errorf("%q exponent requires decimal mantissa", ch)
case e == 'p' && prefix != 'x':
s.errorf("%q exponent requires hexadecimal mantissa", ch)
}
ch = s.next()
if ch == '-' || ch == '+' {
tok = Float
if ch == '+' || ch == '-' {
ch = s.next()
}
if !isDecimal(ch) {
s.error("illegal exponent")
ch, ds = s.digits(ch, 10, nil)
digsep |= ds
if ds&1 == 0 {
s.error("exponent has no digits")
}
ch = s.scanMantissa(ch)
} else if prefix == 'x' && tok == Float {
s.error("hexadecimal mantissa requires a 'p' exponent")
}
return ch
if tok == Int && invalid != 0 {
s.errorf("invalid digit %q in %s", invalid, litname(prefix))
}
if digsep&2 != 0 {
s.tokEnd = s.srcPos - s.lastCharLen // make sure token text is terminated
if i := invalidSep(s.TokenText()); i >= 0 {
s.error("'_' must separate successive digits")
}
}
return tok, ch
}
func (s *Scanner) scanNumber(ch rune) (rune, rune) {
// isDecimal(ch)
if ch == '0' {
// int or float
ch = s.next()
if ch == 'x' || ch == 'X' {
// hexadecimal int
ch = s.next()
hasMantissa := false
for digitVal(ch) < 16 {
ch = s.next()
hasMantissa = true
}
if !hasMantissa {
s.error("illegal hexadecimal number")
}
} else {
// octal int or float
has8or9 := false
for isDecimal(ch) {
if ch > '7' {
has8or9 = true
}
ch = s.next()
}
if s.Mode&ScanFloats != 0 && (ch == '.' || ch == 'e' || ch == 'E') {
// float
ch = s.scanFraction(ch)
ch = s.scanExponent(ch)
return Float, ch
func litname(prefix rune) string {
switch prefix {
default:
return "decimal literal"
case 'x':
return "hexadecimal literal"
case 'o', '0':
return "octal literal"
case 'b':
return "binary literal"
}
}
// invalidSep returns the index of the first invalid separator in x, or -1.
func invalidSep(x string) int {
x1 := ' ' // prefix char, we only care if it's 'x'
d := '.' // digit, one of '_', '0' (a digit), or '.' (anything else)
i := 0
// a prefix counts as a digit
if len(x) >= 2 && x[0] == '0' {
x1 = lower(rune(x[1]))
if x1 == 'x' || x1 == 'o' || x1 == 'b' {
d = '0'
i = 2
}
}
// mantissa and exponent
for ; i < len(x); i++ {
p := d // previous digit
d = rune(x[i])
switch {
case d == '_':
if p != '0' {
return i
}
// octal int
if has8or9 {
s.error("illegal octal number")
case isDecimal(d) || x1 == 'x' && isHex(d):
d = '0'
default:
if p == '_' {
return i - 1
}
d = '.'
}
return Int, ch
}
// decimal int or float
ch = s.scanMantissa(ch)
if s.Mode&ScanFloats != 0 && (ch == '.' || ch == 'e' || ch == 'E') {
// float
ch = s.scanFraction(ch)
ch = s.scanExponent(ch)
return Float, ch
if d == '_' {
return len(x) - 1
}
return -1
}
func digitVal(ch rune) int {
switch {
case '0' <= ch && ch <= '9':
return int(ch - '0')
case 'a' <= lower(ch) && lower(ch) <= 'f':
return int(lower(ch) - 'a' + 10)
}
return Int, ch
return 16 // larger than any legal digit val
}
func (s *Scanner) scanDigits(ch rune, base, n int) rune {
......@@ -448,7 +546,7 @@ func (s *Scanner) scanDigits(ch rune, base, n int) rune {
n--
}
if n > 0 {
s.error("illegal char escape")
s.error("invalid char escape")
}
return ch
}
......@@ -468,7 +566,7 @@ func (s *Scanner) scanEscape(quote rune) rune {
case 'U':
ch = s.scanDigits(s.next(), 16, 8)
default:
s.error("illegal char escape")
s.error("invalid char escape")
}
return ch
}
......@@ -503,7 +601,7 @@ func (s *Scanner) scanRawString() {
func (s *Scanner) scanChar() {
if s.scanString('\'') != 1 {
s.error("illegal char literal")
s.error("invalid char literal")
}
}
......@@ -584,7 +682,7 @@ redo:
}
case isDecimal(ch):
if s.Mode&(ScanInts|ScanFloats) != 0 {
tok, ch = s.scanNumber(ch)
tok, ch = s.scanNumber(ch, true)
} else {
ch = s.next()
}
......@@ -607,9 +705,7 @@ redo:
case '.':
ch = s.next()
if isDecimal(ch) && s.Mode&ScanFloats != 0 {
tok = Float
ch = s.scanMantissa(ch)
ch = s.scanExponent(ch)
tok, ch = s.scanNumber(ch, false)
}
case '/':
ch = s.next()
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment