Commit 6e615a57 authored by Rob Pike's avatar Rob Pike

scan: permit base prefixes 0nnn and 0xnn when scanning

signed or unsigned integers using %v or the formatless scanner.
That is, Sscan("0x11", &i) or Sscanf("0x11", "%v", &i) will now
set i to 17.   If a format other than %v is presented, the behavior
is as before.

Fixes #1469.

R=rsc
CC=golang-dev
https://golang.org/cl/4131042
parent c14c4e55
...@@ -139,6 +139,10 @@ ...@@ -139,6 +139,10 @@
%e %E %f %F %g %g are all equivalent and scan any floating point or complex value %e %E %f %F %g %g are all equivalent and scan any floating point or complex value
%s and %v on strings scan a space-delimited token %s and %v on strings scan a space-delimited token
The familiar base-setting prefixes 0 (octal) and 0x
(hexadecimal) are accepted when scanning integers without a
format or with the %v verb.
Width is interpreted in the input text (%5s means at most Width is interpreted in the input text (%5s means at most
five runes of input will be read to scan a string) but there five runes of input will be read to scan a string) but there
is no syntax for scanning with a precision (no %5.2f, just is no syntax for scanning with a precision (no %5.2f, just
......
...@@ -388,14 +388,12 @@ func (s *ss) consume(ok string, accept bool) bool { ...@@ -388,14 +388,12 @@ func (s *ss) consume(ok string, accept bool) bool {
if rune == EOF { if rune == EOF {
return false return false
} }
for i := 0; i < len(ok); i++ { if strings.IndexRune(ok, rune) >= 0 {
if int(ok[i]) == rune { if accept {
if accept { s.buf.WriteRune(rune)
s.buf.WriteRune(rune) s.wid++
s.wid++
}
return true
} }
return true
} }
if rune != EOF && accept { if rune != EOF && accept {
s.UngetRune() s.UngetRune()
...@@ -403,6 +401,15 @@ func (s *ss) consume(ok string, accept bool) bool { ...@@ -403,6 +401,15 @@ func (s *ss) consume(ok string, accept bool) bool {
return false return false
} }
// peek reports whether the next character is in the ok string, without consuming it.
func (s *ss) peek(ok string) bool {
rune := s.getRune()
if rune != EOF {
s.UngetRune()
}
return strings.IndexRune(ok, rune) >= 0
}
// accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the // accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
// buffer and returns true. Otherwise it return false. // buffer and returns true. Otherwise it return false.
func (s *ss) accept(ok string) bool { func (s *ss) accept(ok string) bool {
...@@ -476,8 +483,8 @@ func (s *ss) getBase(verb int) (base int, digits string) { ...@@ -476,8 +483,8 @@ func (s *ss) getBase(verb int) (base int, digits string) {
} }
// scanNumber returns the numerical string with specified digits starting here. // scanNumber returns the numerical string with specified digits starting here.
func (s *ss) scanNumber(digits string) string { func (s *ss) scanNumber(digits string, haveDigits bool) string {
if !s.accept(digits) { if !haveDigits && !s.accept(digits) {
s.errorString("expected integer") s.errorString("expected integer")
} }
for s.accept(digits) { for s.accept(digits) {
...@@ -496,22 +503,44 @@ func (s *ss) scanRune(bitSize int) int64 { ...@@ -496,22 +503,44 @@ func (s *ss) scanRune(bitSize int) int64 {
return rune return rune
} }
// scanBasePrefix reports whether the integer begins with a 0 or 0x,
// and returns the base, digit string, and whether a zero was found.
// It is called only if the verb is %v.
func (s *ss) scanBasePrefix() (base int, digits string, found bool) {
if !s.peek("0") {
return 10, decimalDigits, false
}
s.accept("0")
found = true // We've put a digit into the token buffer.
// Special cases for '0' && '0x'
base, digits = 8, octalDigits
if s.peek("xX") {
s.consume("xX", false)
base, digits = 16, hexadecimalDigits
}
return
}
// scanInt returns the value of the integer represented by the next // scanInt returns the value of the integer represented by the next
// token, checking for overflow. Any error is stored in s.err. // token, checking for overflow. Any error is stored in s.err.
func (s *ss) scanInt(verb int, bitSize int) int64 { func (s *ss) scanInt(verb int, bitSize int) int64 {
if verb == 'c' { if verb == 'c' {
return s.scanRune(bitSize) return s.scanRune(bitSize)
} }
base, digits := s.getBase(verb)
s.skipSpace(false) s.skipSpace(false)
base, digits := s.getBase(verb)
haveDigits := false
if verb == 'U' { if verb == 'U' {
if !s.consume("U", false) || !s.consume("+", false) { if !s.consume("U", false) || !s.consume("+", false) {
s.errorString("bad unicode format ") s.errorString("bad unicode format ")
} }
} else { } else {
s.accept(sign) // If there's a sign, it will be left in the token buffer. s.accept(sign) // If there's a sign, it will be left in the token buffer.
if verb == 'v' {
base, digits, haveDigits = s.scanBasePrefix()
}
} }
tok := s.scanNumber(digits) tok := s.scanNumber(digits, haveDigits)
i, err := strconv.Btoi64(tok, base) i, err := strconv.Btoi64(tok, base)
if err != nil { if err != nil {
s.error(err) s.error(err)
...@@ -530,14 +559,17 @@ func (s *ss) scanUint(verb int, bitSize int) uint64 { ...@@ -530,14 +559,17 @@ func (s *ss) scanUint(verb int, bitSize int) uint64 {
if verb == 'c' { if verb == 'c' {
return uint64(s.scanRune(bitSize)) return uint64(s.scanRune(bitSize))
} }
base, digits := s.getBase(verb)
s.skipSpace(false) s.skipSpace(false)
base, digits := s.getBase(verb)
haveDigits := false
if verb == 'U' { if verb == 'U' {
if !s.consume("U", false) || !s.consume("+", false) { if !s.consume("U", false) || !s.consume("+", false) {
s.errorString("bad unicode format ") s.errorString("bad unicode format ")
} }
} else if verb == 'v' {
base, digits, haveDigits = s.scanBasePrefix()
} }
tok := s.scanNumber(digits) tok := s.scanNumber(digits, haveDigits)
i, err := strconv.Btoui64(tok, base) i, err := strconv.Btoui64(tok, base)
if err != nil { if err != nil {
s.error(err) s.error(err)
......
...@@ -129,10 +129,20 @@ func newReader(s string) *myStringReader { ...@@ -129,10 +129,20 @@ func newReader(s string) *myStringReader {
} }
var scanTests = []ScanTest{ var scanTests = []ScanTest{
// Numbers // Basic types
{"T\n", &boolVal, true}, // boolean test vals toggle to be sure they are written {"T\n", &boolVal, true}, // boolean test vals toggle to be sure they are written
{"F\n", &boolVal, false}, // restored to zero value {"F\n", &boolVal, false}, // restored to zero value
{"21\n", &intVal, 21}, {"21\n", &intVal, 21},
{"0\n", &intVal, 0},
{"000\n", &intVal, 0},
{"0x10\n", &intVal, 0x10},
{"-0x10\n", &intVal, -0x10},
{"0377\n", &intVal, 0377},
{"-0377\n", &intVal, -0377},
{"0\n", &uintVal, uint(0)},
{"000\n", &uintVal, uint(0)},
{"0x10\n", &uintVal, uint(0x10)},
{"0377\n", &uintVal, uint(0377)},
{"22\n", &int8Val, int8(22)}, {"22\n", &int8Val, int8(22)},
{"23\n", &int16Val, int16(23)}, {"23\n", &int16Val, int16(23)},
{"24\n", &int32Val, int32(24)}, {"24\n", &int32Val, int32(24)},
...@@ -201,6 +211,8 @@ var scanfTests = []ScanfTest{ ...@@ -201,6 +211,8 @@ var scanfTests = []ScanfTest{
{"%v", "TRUE\n", &boolVal, true}, {"%v", "TRUE\n", &boolVal, true},
{"%t", "false\n", &boolVal, false}, {"%t", "false\n", &boolVal, false},
{"%v", "-71\n", &intVal, -71}, {"%v", "-71\n", &intVal, -71},
{"%v", "0377\n", &intVal, 0377},
{"%v", "0x44\n", &intVal, 0x44},
{"%d", "72\n", &intVal, 72}, {"%d", "72\n", &intVal, 72},
{"%c", "a\n", &intVal, 'a'}, {"%c", "a\n", &intVal, 'a'},
{"%c", "\u5072\n", &intVal, 0x5072}, {"%c", "\u5072\n", &intVal, 0x5072},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment