Commit 6ee79973 authored by Rob Pike's avatar Rob Pike

fmt.Fscan: use UnreadRune to preserve data across calls.

Fixes #1055.
(About as well as it can be fixed without changing the API.)

R=rsc
CC=golang-dev
https://golang.org/cl/2156047
parent 23bd214a
...@@ -129,9 +129,13 @@ ...@@ -129,9 +129,13 @@
All arguments to be scanned must be either pointers to basic All arguments to be scanned must be either pointers to basic
types or implementations of the Scanner interface. types or implementations of the Scanner interface.
Note: Fscan etc. can read one character past the input Note: Fscan etc. can read one character (rune) past the
they return, which means that a loop calling a scan routine input they return, which means that a loop calling a scan
may skip some of the input. This is usually a problem only routine may skip some of the input. This is usually a
when there is no space between input values. problem only when there is no space between input values.
However, if the reader provided to Fscan implements UnreadRune,
that method will be used to save the character and successive
calls will not lose data. To attach an UnreadRune method
to a reader without that capability, use bufio.NewReader.
*/ */
package fmt package fmt
...@@ -22,6 +22,14 @@ type readRuner interface { ...@@ -22,6 +22,14 @@ type readRuner interface {
ReadRune() (rune int, size int, err os.Error) ReadRune() (rune int, size int, err os.Error)
} }
// unreadRuner is the interface to something that can unread runes.
// If the object provided to Scan does not satisfy this interface,
// a local buffer will be used to back up the input, but its contents
// will be lost when Scan returns.
type unreadRuner interface {
UnreadRune() os.Error
}
// ScanState represents the scanner state passed to custom scanners. // ScanState represents the scanner state passed to custom scanners.
// Scanners may do rune-at-a-time scanning or ask the ScanState // Scanners may do rune-at-a-time scanning or ask the ScanState
// to discover the next space-delimited token. // to discover the next space-delimited token.
...@@ -29,7 +37,7 @@ type ScanState interface { ...@@ -29,7 +37,7 @@ type ScanState interface {
// GetRune reads the next rune (Unicode code point) from the input. // GetRune reads the next rune (Unicode code point) from the input.
GetRune() (rune int, err os.Error) GetRune() (rune int, err os.Error)
// UngetRune causes the next call to GetRune to return the rune. // UngetRune causes the next call to GetRune to return the rune.
UngetRune(rune int) UngetRune()
// Width returns the value of the width option and whether it has been set. // Width returns the value of the width option and whether it has been set.
// The unit is Unicode code points. // The unit is Unicode code points.
Width() (wid int, ok bool) Width() (wid int, ok bool)
...@@ -133,6 +141,7 @@ type ss struct { ...@@ -133,6 +141,7 @@ type ss struct {
buf bytes.Buffer // token accumulator buf bytes.Buffer // token accumulator
nlIsSpace bool // whether newline counts as white space nlIsSpace bool // whether newline counts as white space
peekRune int // one-rune lookahead peekRune int // one-rune lookahead
prevRune int // last rune returned by GetRune
atEOF bool // already read EOF atEOF bool // already read EOF
maxWid int // max width of field, in runes maxWid int // max width of field, in runes
widPresent bool // width was specified widPresent bool // width was specified
...@@ -142,10 +151,14 @@ type ss struct { ...@@ -142,10 +151,14 @@ type ss struct {
func (s *ss) GetRune() (rune int, err os.Error) { func (s *ss) GetRune() (rune int, err os.Error) {
if s.peekRune >= 0 { if s.peekRune >= 0 {
rune = s.peekRune rune = s.peekRune
s.prevRune = rune
s.peekRune = -1 s.peekRune = -1
return return
} }
rune, _, err = s.rr.ReadRune() rune, _, err = s.rr.ReadRune()
if err == nil {
s.prevRune = rune
}
return return
} }
...@@ -161,11 +174,14 @@ func (s *ss) getRune() (rune int) { ...@@ -161,11 +174,14 @@ func (s *ss) getRune() (rune int) {
} }
if s.peekRune >= 0 { if s.peekRune >= 0 {
rune = s.peekRune rune = s.peekRune
s.prevRune = rune
s.peekRune = -1 s.peekRune = -1
return return
} }
rune, _, err := s.rr.ReadRune() rune, _, err := s.rr.ReadRune()
if err != nil { if err == nil {
s.prevRune = rune
} else if err != nil {
if err == os.EOF { if err == os.EOF {
s.atEOF = true s.atEOF = true
return EOF return EOF
...@@ -198,8 +214,12 @@ func (s *ss) mustGetRune() (rune int) { ...@@ -198,8 +214,12 @@ func (s *ss) mustGetRune() (rune int) {
} }
func (s *ss) UngetRune(rune int) { func (s *ss) UngetRune() {
s.peekRune = rune if u, ok := s.rr.(unreadRuner); ok {
u.UnreadRune()
} else {
s.peekRune = s.prevRune
}
} }
func (s *ss) error(err os.Error) { func (s *ss) error(err os.Error) {
...@@ -334,7 +354,7 @@ func (s *ss) skipSpace(stopAtNewline bool) { ...@@ -334,7 +354,7 @@ func (s *ss) skipSpace(stopAtNewline bool) {
return return
} }
if !unicode.IsSpace(rune) { if !unicode.IsSpace(rune) {
s.UngetRune(rune) s.UngetRune()
break break
} }
} }
...@@ -352,7 +372,7 @@ func (s *ss) token() string { ...@@ -352,7 +372,7 @@ func (s *ss) token() string {
break break
} }
if unicode.IsSpace(rune) { if unicode.IsSpace(rune) {
s.UngetRune(rune) s.UngetRune()
break break
} }
s.buf.WriteRune(rune) s.buf.WriteRune(rune)
...@@ -386,7 +406,7 @@ func (s *ss) accept(ok string) bool { ...@@ -386,7 +406,7 @@ func (s *ss) accept(ok string) bool {
} }
} }
if rune != EOF { if rune != EOF {
s.UngetRune(rune) s.UngetRune()
} }
return false return false
} }
...@@ -681,7 +701,7 @@ func (s *ss) hexByte() (b byte, ok bool) { ...@@ -681,7 +701,7 @@ func (s *ss) hexByte() (b byte, ok bool) {
return return
} }
if unicode.IsSpace(rune1) { if unicode.IsSpace(rune1) {
s.UngetRune(rune1) s.UngetRune()
return return
} }
rune2 := s.mustGetRune() rune2 := s.mustGetRune()
...@@ -892,7 +912,7 @@ func (s *ss) advance(format string) (i int) { ...@@ -892,7 +912,7 @@ func (s *ss) advance(format string) (i int) {
} }
inputc := s.mustGetRune() inputc := s.mustGetRune()
if fmtc != inputc { if fmtc != inputc {
s.UngetRune(inputc) s.UngetRune()
return -1 return -1
} }
i += w i += w
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
package fmt_test package fmt_test
import ( import (
"bufio"
. "fmt" . "fmt"
"io" "io"
"os" "os"
...@@ -582,3 +583,24 @@ func TestEOF(t *testing.T) { ...@@ -582,3 +583,24 @@ func TestEOF(t *testing.T) {
t.Error("expected one EOF, got", ec.eofCount) t.Error("expected one EOF, got", ec.eofCount)
} }
} }
// Verify that, at least when using bufio, successive calls to Fscan do not lose runes.
func TestUnreadRuneWithBufio(t *testing.T) {
r := bufio.NewReader(strings.NewReader("123αb"))
var i int
var a string
n, err := Fscanf(r, "%d", &i)
if n != 1 || err != nil {
t.Errorf("reading int expected one item, no errors; got %d %q", n, err)
}
if i != 123 {
t.Errorf("expected 123; got %d", i)
}
n, err = Fscanf(r, "%s", &a)
if n != 1 || err != nil {
t.Errorf("reading string expected one item, no errors; got %d %q", n, err)
}
if a != "αb" {
t.Errorf("expected αb; got %q", a)
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment