Commit 81bfbe93 authored by Roger Peppe's avatar Roger Peppe Committed by Rob Pike

fmt: allow recursive calls to Fscan etc.

Add a new Read method to ScanState so that it
satisfies the io.Reader interface; rename
Getrune and Ungetrune to ReadRune and UnreadRune.
Make sure ReadRune does not read past width restrictions;
remove now-unnecessary Width method from ScanState.
Also make the documentation a little clearer as to
how ReadRune and UnreadRune are used.

R=r, r2
CC=golang-dev
https://golang.org/cl/4240056
parent 324cc3d0
...@@ -164,13 +164,15 @@ ...@@ -164,13 +164,15 @@
All arguments to be scanned must be either pointers to basic All arguments to be scanned must be either pointers to basic
types or implementations of the Scanner interface. types or implementations of the Scanner interface.
Note: Fscan etc. can read one character (rune) past the Note: Fscan etc. can read one character (rune) past the input
input they return, which means that a loop calling a scan they return, which means that a loop calling a scan routine
routine may skip some of the input. This is usually a may skip some of the input. This is usually a problem only
problem only when there is no space between input values. when there is no space between input values. If the reader
However, if the reader provided to Fscan implements UnreadRune, provided to Fscan implements ReadRune, that method will be used
to read characters. If the reader also implements UnreadRune,
that method will be used to save the character and successive that method will be used to save the character and successive
calls will not lose data. To attach an UnreadRune method calls will not lose data. To attach ReadRune and UnreadRune
to a reader without that capability, use bufio.NewReader. methods to a reader without that capability, use
bufio.NewReader.
*/ */
package fmt package fmt
...@@ -28,19 +28,24 @@ type runeUnreader interface { ...@@ -28,19 +28,24 @@ type runeUnreader interface {
// Scanners may do rune-at-a-time scanning or ask the ScanState // Scanners may do rune-at-a-time scanning or ask the ScanState
// to discover the next space-delimited token. // to discover the next space-delimited token.
type ScanState interface { type ScanState interface {
// GetRune reads the next rune (Unicode code point) from the input. // ReadRune reads the next rune (Unicode code point) from the input.
// If invoked during Scanln, Fscanln, or Sscanln, GetRune() will // If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will
// return EOF after returning the first '\n'. // return EOF after returning the first '\n' or when reading beyond
GetRune() (rune int, err os.Error) // the specified width.
// UngetRune causes the next call to GetRune to return the same rune. ReadRune() (rune int, size int, err os.Error)
UngetRune() // UnreadRune causes the next call to ReadRune to return the same rune.
// Width returns the value of the width option and whether it has been set. UnreadRune() os.Error
// The unit is Unicode code points.
Width() (wid int, ok bool)
// Token returns the next space-delimited token from the input. If // Token returns the next space-delimited token from the input. If
// a width has been specified, the returned token will be no longer // a width has been specified, the returned token will be no longer
// than the width. // than the width.
Token() (token string, err os.Error) Token() (token string, err os.Error)
// Width returns the value of the width option and whether it has been set.
// The unit is Unicode code points.
Width() (wid int, ok bool)
// Because ReadRune is implemented by the interface, Read should never be
// called by the scanning routines and a valid implementation of
// ScanState may choose always to return an error from Read.
Read(buf []byte) (n int, err os.Error)
} }
// Scanner is implemented by any value that has a Scan method, which scans // Scanner is implemented by any value that has a Scan method, which scans
...@@ -133,59 +138,61 @@ const EOF = -1 ...@@ -133,59 +138,61 @@ const EOF = -1
// ss is the internal implementation of ScanState. // ss is the internal implementation of ScanState.
type ss struct { type ss struct {
rr io.RuneReader // where to read input rr io.RuneReader // where to read input
buf bytes.Buffer // token accumulator buf bytes.Buffer // token accumulator
nlIsSpace bool // whether newline counts as white space nlIsSpace bool // whether newline counts as white space
nlIsEnd bool // whether newline terminates scan nlIsEnd bool // whether newline terminates scan
peekRune int // one-rune lookahead peekRune int // one-rune lookahead
prevRune int // last rune returned by GetRune prevRune int // last rune returned by ReadRune
atEOF bool // already read EOF atEOF bool // already read EOF
maxWid int // max width of field, in runes maxWid int // max width of field, in runes
widPresent bool // width was specified wid int // width consumed so far; used in accept()
wid int // width consumed so far; used in accept()
} }
func (s *ss) GetRune() (rune int, err os.Error) { // The Read method is only in ScanState so that ScanState
// satisfies io.Reader. It will never be called when used as
// intended, so there is no need to make it actually work.
func (s *ss) Read(buf []byte) (n int, err os.Error) {
return 0, os.ErrorString("ScanState's Read should not be called. Use ReadRune")
}
func (s *ss) ReadRune() (rune int, size int, err os.Error) {
if s.peekRune >= 0 { if s.peekRune >= 0 {
s.wid++
rune = s.peekRune rune = s.peekRune
size = utf8.RuneLen(rune)
s.prevRune = rune s.prevRune = rune
s.peekRune = -1 s.peekRune = -1
return return
} }
if s.nlIsEnd && s.prevRune == '\n' { if s.atEOF || s.nlIsEnd && s.prevRune == '\n' || s.wid >= s.maxWid {
rune = EOF
err = os.EOF err = os.EOF
return return
} }
rune, _, err = s.rr.ReadRune()
rune, size, err = s.rr.ReadRune()
if err == nil { if err == nil {
s.wid++
s.prevRune = rune s.prevRune = rune
} else if err == os.EOF {
s.atEOF = true
} }
return return
} }
func (s *ss) Width() (wid int, ok bool) { func (s *ss) Width() (wid int, ok bool) {
return s.maxWid, s.widPresent if s.maxWid == hugeWid {
return 0, false
}
return s.maxWid, true
} }
// The public method returns an error; this private one panics. // The public method returns an error; this private one panics.
// If getRune reaches EOF, the return value is EOF (-1). // If getRune reaches EOF, the return value is EOF (-1).
func (s *ss) getRune() (rune int) { func (s *ss) getRune() (rune int) {
if s.atEOF { rune, _, err := s.ReadRune()
return EOF if err != nil {
}
if s.peekRune >= 0 {
rune = s.peekRune
s.prevRune = rune
s.peekRune = -1
return
}
rune, _, err := s.rr.ReadRune()
if err == nil {
s.prevRune = rune
} else if err != nil {
if err == os.EOF { if err == os.EOF {
s.atEOF = true
return EOF return EOF
} }
s.error(err) s.error(err)
...@@ -193,35 +200,25 @@ func (s *ss) getRune() (rune int) { ...@@ -193,35 +200,25 @@ func (s *ss) getRune() (rune int) {
return return
} }
// mustGetRune turns os.EOF into a panic(io.ErrUnexpectedEOF). // mustReadRune turns os.EOF into a panic(io.ErrUnexpectedEOF).
// It is called in cases such as string scanning where an EOF is a // It is called in cases such as string scanning where an EOF is a
// syntax error. // syntax error.
func (s *ss) mustGetRune() (rune int) { func (s *ss) mustReadRune() (rune int) {
if s.atEOF { rune = s.getRune()
if rune == EOF {
s.error(io.ErrUnexpectedEOF) s.error(io.ErrUnexpectedEOF)
} }
if s.peekRune >= 0 {
rune = s.peekRune
s.peekRune = -1
return
}
rune, _, err := s.rr.ReadRune()
if err != nil {
if err == os.EOF {
err = io.ErrUnexpectedEOF
}
s.error(err)
}
return return
} }
func (s *ss) UnreadRune() os.Error {
func (s *ss) UngetRune() {
if u, ok := s.rr.(runeUnreader); ok { if u, ok := s.rr.(runeUnreader); ok {
u.UnreadRune() u.UnreadRune()
} else { } else {
s.peekRune = s.prevRune s.peekRune = s.prevRune
} }
s.wid--
return nil
} }
func (s *ss) error(err os.Error) { func (s *ss) error(err os.Error) {
...@@ -320,8 +317,7 @@ func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) *ss { ...@@ -320,8 +317,7 @@ func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) *ss {
s.prevRune = -1 s.prevRune = -1
s.peekRune = -1 s.peekRune = -1
s.atEOF = false s.atEOF = false
s.maxWid = 0 s.maxWid = hugeWid
s.widPresent = false
return s return s
} }
...@@ -354,7 +350,7 @@ func (s *ss) skipSpace(stopAtNewline bool) { ...@@ -354,7 +350,7 @@ func (s *ss) skipSpace(stopAtNewline bool) {
return return
} }
if !unicode.IsSpace(rune) { if !unicode.IsSpace(rune) {
s.UngetRune() s.UnreadRune()
break break
} }
} }
...@@ -366,13 +362,13 @@ func (s *ss) skipSpace(stopAtNewline bool) { ...@@ -366,13 +362,13 @@ func (s *ss) skipSpace(stopAtNewline bool) {
func (s *ss) token() string { func (s *ss) token() string {
s.skipSpace(false) s.skipSpace(false)
// read until white space or newline // read until white space or newline
for nrunes := 0; !s.widPresent || nrunes < s.maxWid; nrunes++ { for {
rune := s.getRune() rune := s.getRune()
if rune == EOF { if rune == EOF {
break break
} }
if unicode.IsSpace(rune) { if unicode.IsSpace(rune) {
s.UngetRune() s.UnreadRune()
break break
} }
s.buf.WriteRune(rune) s.buf.WriteRune(rune)
...@@ -391,9 +387,6 @@ var boolError = os.ErrorString("syntax error scanning boolean") ...@@ -391,9 +387,6 @@ var boolError = os.ErrorString("syntax error scanning boolean")
// consume reads the next rune in the input and reports whether it is in the ok string. // consume reads the next rune in the input and reports whether it is in the ok string.
// If accept is true, it puts the character into the input token. // If accept is true, it puts the character into the input token.
func (s *ss) consume(ok string, accept bool) bool { func (s *ss) consume(ok string, accept bool) bool {
if s.wid >= s.maxWid {
return false
}
rune := s.getRune() rune := s.getRune()
if rune == EOF { if rune == EOF {
return false return false
...@@ -401,12 +394,11 @@ func (s *ss) consume(ok string, accept bool) bool { ...@@ -401,12 +394,11 @@ func (s *ss) consume(ok string, accept bool) bool {
if strings.IndexRune(ok, rune) >= 0 { if strings.IndexRune(ok, rune) >= 0 {
if accept { if accept {
s.buf.WriteRune(rune) s.buf.WriteRune(rune)
s.wid++
} }
return true return true
} }
if rune != EOF && accept { if rune != EOF && accept {
s.UngetRune() s.UnreadRune()
} }
return false return false
} }
...@@ -415,7 +407,7 @@ func (s *ss) consume(ok string, accept bool) bool { ...@@ -415,7 +407,7 @@ func (s *ss) consume(ok string, accept bool) bool {
func (s *ss) peek(ok string) bool { func (s *ss) peek(ok string) bool {
rune := s.getRune() rune := s.getRune()
if rune != EOF { if rune != EOF {
s.UngetRune() s.UnreadRune()
} }
return strings.IndexRune(ok, rune) >= 0 return strings.IndexRune(ok, rune) >= 0
} }
...@@ -443,7 +435,7 @@ func (s *ss) scanBool(verb int) bool { ...@@ -443,7 +435,7 @@ func (s *ss) scanBool(verb int) bool {
return false return false
} }
// Syntax-checking a boolean is annoying. We're not fastidious about case. // Syntax-checking a boolean is annoying. We're not fastidious about case.
switch s.mustGetRune() { switch s.mustReadRune() {
case '0': case '0':
return false return false
case '1': case '1':
...@@ -504,7 +496,7 @@ func (s *ss) scanNumber(digits string, haveDigits bool) string { ...@@ -504,7 +496,7 @@ func (s *ss) scanNumber(digits string, haveDigits bool) string {
// scanRune returns the next rune value in the input. // scanRune returns the next rune value in the input.
func (s *ss) scanRune(bitSize int) int64 { func (s *ss) scanRune(bitSize int) int64 {
rune := int64(s.mustGetRune()) rune := int64(s.mustReadRune())
n := uint(bitSize) n := uint(bitSize)
x := (rune << (64 - n)) >> (64 - n) x := (rune << (64 - n)) >> (64 - n)
if x != rune { if x != rune {
...@@ -720,12 +712,12 @@ func (s *ss) convertString(verb int) (str string) { ...@@ -720,12 +712,12 @@ func (s *ss) convertString(verb int) (str string) {
// quotedString returns the double- or back-quoted string represented by the next input characters. // quotedString returns the double- or back-quoted string represented by the next input characters.
func (s *ss) quotedString() string { func (s *ss) quotedString() string {
quote := s.mustGetRune() quote := s.mustReadRune()
switch quote { switch quote {
case '`': case '`':
// Back-quoted: Anything goes until EOF or back quote. // Back-quoted: Anything goes until EOF or back quote.
for { for {
rune := s.mustGetRune() rune := s.mustReadRune()
if rune == quote { if rune == quote {
break break
} }
...@@ -736,13 +728,13 @@ func (s *ss) quotedString() string { ...@@ -736,13 +728,13 @@ func (s *ss) quotedString() string {
// Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes. // Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
s.buf.WriteRune(quote) s.buf.WriteRune(quote)
for { for {
rune := s.mustGetRune() rune := s.mustReadRune()
s.buf.WriteRune(rune) s.buf.WriteRune(rune)
if rune == '\\' { if rune == '\\' {
// In a legal backslash escape, no matter how long, only the character // In a legal backslash escape, no matter how long, only the character
// immediately after the escape can itself be a backslash or quote. // immediately after the escape can itself be a backslash or quote.
// Thus we only need to protect the first character after the backslash. // Thus we only need to protect the first character after the backslash.
rune := s.mustGetRune() rune := s.mustReadRune()
s.buf.WriteRune(rune) s.buf.WriteRune(rune)
} else if rune == '"' { } else if rune == '"' {
break break
...@@ -781,10 +773,10 @@ func (s *ss) hexByte() (b byte, ok bool) { ...@@ -781,10 +773,10 @@ func (s *ss) hexByte() (b byte, ok bool) {
return return
} }
if unicode.IsSpace(rune1) { if unicode.IsSpace(rune1) {
s.UngetRune() s.UnreadRune()
return return
} }
rune2 := s.mustGetRune() rune2 := s.mustReadRune()
return byte(s.hexDigit(rune1)<<4 | s.hexDigit(rune2)), true return byte(s.hexDigit(rune1)<<4 | s.hexDigit(rune2)), true
} }
...@@ -806,6 +798,8 @@ func (s *ss) hexString() string { ...@@ -806,6 +798,8 @@ func (s *ss) hexString() string {
const floatVerbs = "beEfFgGv" const floatVerbs = "beEfFgGv"
const hugeWid = 1 << 30
// scanOne scans a single value, deriving the scanner from the type of the argument. // scanOne scans a single value, deriving the scanner from the type of the argument.
func (s *ss) scanOne(verb int, field interface{}) { func (s *ss) scanOne(verb int, field interface{}) {
s.buf.Reset() s.buf.Reset()
...@@ -821,10 +815,6 @@ func (s *ss) scanOne(verb int, field interface{}) { ...@@ -821,10 +815,6 @@ func (s *ss) scanOne(verb int, field interface{}) {
} }
return return
} }
if !s.widPresent {
s.maxWid = 1 << 30 // Huge
}
s.wid = 0
switch v := field.(type) { switch v := field.(type) {
case *bool: case *bool:
*v = s.scanBool(verb) *v = s.scanBool(verb)
...@@ -925,7 +915,6 @@ func errorHandler(errp *os.Error) { ...@@ -925,7 +915,6 @@ func errorHandler(errp *os.Error) {
} }
// doScan does the real work for scanning without a format string. // doScan does the real work for scanning without a format string.
// At the moment, it handles only pointers to basic types.
func (s *ss) doScan(a []interface{}) (numProcessed int, err os.Error) { func (s *ss) doScan(a []interface{}) (numProcessed int, err os.Error) {
defer errorHandler(&err) defer errorHandler(&err)
for _, field := range a { for _, field := range a {
...@@ -986,9 +975,9 @@ func (s *ss) advance(format string) (i int) { ...@@ -986,9 +975,9 @@ func (s *ss) advance(format string) (i int) {
s.skipSpace(true) s.skipSpace(true)
continue continue
} }
inputc := s.mustGetRune() inputc := s.mustReadRune()
if fmtc != inputc { if fmtc != inputc {
s.UngetRune() s.UnreadRune()
return -1 return -1
} }
i += w i += w
...@@ -1020,7 +1009,12 @@ func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err os.E ...@@ -1020,7 +1009,12 @@ func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err os.E
i++ // % is one byte i++ // % is one byte
// do we have 20 (width)? // do we have 20 (width)?
s.maxWid, s.widPresent, i = parsenum(format, i, end) var widPresent bool
s.maxWid, widPresent, i = parsenum(format, i, end)
if !widPresent {
s.maxWid = hugeWid
}
s.wid = 0
c, w := utf8.DecodeRuneInString(format[i:]) c, w := utf8.DecodeRuneInString(format[i:])
i += w i += w
...@@ -1033,6 +1027,7 @@ func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err os.E ...@@ -1033,6 +1027,7 @@ func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err os.E
s.scanOne(c, field) s.scanOne(c, field)
numProcessed++ numProcessed++
s.maxWid = hugeWid
} }
if numProcessed < len(a) { if numProcessed < len(a) {
s.errorString("too many operands") s.errorString("too many operands")
......
...@@ -87,21 +87,7 @@ type FloatTest struct { ...@@ -87,21 +87,7 @@ type FloatTest struct {
type Xs string type Xs string
func (x *Xs) Scan(state ScanState, verb int) os.Error { func (x *Xs) Scan(state ScanState, verb int) os.Error {
var tok string tok, err := state.Token()
var c int
var err os.Error
wid, present := state.Width()
if !present {
tok, err = state.Token()
} else {
for i := 0; i < wid; i++ {
c, err = state.GetRune()
if err != nil {
break
}
tok += string(c)
}
}
if err != nil { if err != nil {
return err return err
} }
...@@ -114,6 +100,26 @@ func (x *Xs) Scan(state ScanState, verb int) os.Error { ...@@ -114,6 +100,26 @@ func (x *Xs) Scan(state ScanState, verb int) os.Error {
var xVal Xs var xVal Xs
// IntString accepts an integer followed immediately by a string.
// It tests the embedding of a scan within a scan.
type IntString struct {
i int
s string
}
func (s *IntString) Scan(state ScanState, verb int) os.Error {
if _, err := Fscan(state, &s.i); err != nil {
return err
}
if _, err := Fscan(state, &s.s); err != nil {
return err
}
return nil
}
var intStringVal IntString
// myStringReader implements Read but not ReadRune, allowing us to test our readRune wrapper // myStringReader implements Read but not ReadRune, allowing us to test our readRune wrapper
// type that creates something that can read runes given only Read(). // type that creates something that can read runes given only Read().
type myStringReader struct { type myStringReader struct {
...@@ -200,8 +206,9 @@ var scanTests = []ScanTest{ ...@@ -200,8 +206,9 @@ var scanTests = []ScanTest{
{"114\n", &renamedStringVal, renamedString("114")}, {"114\n", &renamedStringVal, renamedString("114")},
{"115\n", &renamedBytesVal, renamedBytes([]byte("115"))}, {"115\n", &renamedBytesVal, renamedBytes([]byte("115"))},
// Custom scanner. // Custom scanners.
{" vvv ", &xVal, Xs("vvv")}, {" vvv ", &xVal, Xs("vvv")},
{" 1234hello", &intStringVal, IntString{1234, "hello"}},
// Fixed bugs // Fixed bugs
{"2147483648\n", &int64Val, int64(2147483648)}, // was: integer overflow {"2147483648\n", &int64Val, int64(2147483648)}, // was: integer overflow
...@@ -308,6 +315,7 @@ var f float64 ...@@ -308,6 +315,7 @@ var f float64
var s, t string var s, t string
var c complex128 var c complex128
var x, y Xs var x, y Xs
var z IntString
var multiTests = []ScanfMultiTest{ var multiTests = []ScanfMultiTest{
{"", "", nil, nil, ""}, {"", "", nil, nil, ""},
...@@ -321,8 +329,9 @@ var multiTests = []ScanfMultiTest{ ...@@ -321,8 +329,9 @@ var multiTests = []ScanfMultiTest{
{"%d%s", "123abc", args(&i, &s), args(123, "abc"), ""}, {"%d%s", "123abc", args(&i, &s), args(123, "abc"), ""},
{"%c%c%c", "2\u50c2X", args(&i, &j, &k), args('2', '\u50c2', 'X'), ""}, {"%c%c%c", "2\u50c2X", args(&i, &j, &k), args('2', '\u50c2', 'X'), ""},
// Custom scanner. // Custom scanners.
{"%2e%f", "eefffff", args(&x, &y), args(Xs("ee"), Xs("fffff")), ""}, {"%2e%f", "eefffff", args(&x, &y), args(Xs("ee"), Xs("fffff")), ""},
{"%4v%s", "12abcd", args(&z, &s), args(IntString{12, "ab"}, "cd"), ""},
// Errors // Errors
{"%t", "23 18", args(&i), nil, "bad verb"}, {"%t", "23 18", args(&i), nil, "bad verb"},
...@@ -345,7 +354,11 @@ func testScan(name string, t *testing.T, scan func(r io.Reader, a ...interface{} ...@@ -345,7 +354,11 @@ func testScan(name string, t *testing.T, scan func(r io.Reader, a ...interface{}
} }
n, err := scan(r, test.in) n, err := scan(r, test.in)
if err != nil { if err != nil {
t.Errorf("%s got error scanning %q: %s", name, test.text, err) m := ""
if n > 0 {
m = Sprintf(" (%d fields ok)", n)
}
t.Errorf("%s got error scanning %q: %s%s", name, test.text, err, m)
continue continue
} }
if n != 1 { if n != 1 {
...@@ -681,7 +694,7 @@ type TwoLines string ...@@ -681,7 +694,7 @@ type TwoLines string
func (t *TwoLines) Scan(state ScanState, verb int) os.Error { func (t *TwoLines) Scan(state ScanState, verb int) os.Error {
chars := make([]int, 0, 100) chars := make([]int, 0, 100)
for nlCount := 0; nlCount < 2; { for nlCount := 0; nlCount < 2; {
c, err := state.GetRune() c, _, err := state.ReadRune()
if err != nil { if err != nil {
return err return err
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment