Commit 855986d5 authored by Robert Griesemer's avatar Robert Griesemer

go/scanner: comply with spec changes (do not allow NUL chars)

and complain about illegal UTF-8 code sequences

R=rsc
CC=golang-dev
https://golang.org/cl/209043
parent 401062f7
...@@ -48,12 +48,17 @@ func (S *Scanner) next() { ...@@ -48,12 +48,17 @@ func (S *Scanner) next() {
S.pos.Column++ S.pos.Column++
r, w := int(S.src[S.offset]), 1 r, w := int(S.src[S.offset]), 1
switch { switch {
case r == 0:
S.error(S.pos, "illegal character NUL")
case r == '\n': case r == '\n':
S.pos.Line++ S.pos.Line++
S.pos.Column = 0 S.pos.Column = 0
case r >= 0x80: case r >= 0x80:
// not ASCII // not ASCII
r, w = utf8.DecodeRune(S.src[S.offset:]) r, w = utf8.DecodeRune(S.src[S.offset:])
if r == utf8.RuneError && w == 1 {
S.error(S.pos, "illegal UTF-8 encoding")
}
} }
S.offset += w S.offset += w
S.ch = r S.ch = r
......
...@@ -551,7 +551,7 @@ func (h *errorCollector) Error(pos token.Position, msg string) { ...@@ -551,7 +551,7 @@ func (h *errorCollector) Error(pos token.Position, msg string) {
} }
func checkError(t *testing.T, src string, tok token.Token, err string) { func checkError(t *testing.T, src string, tok token.Token, pos int, err string) {
var s Scanner var s Scanner
var h errorCollector var h errorCollector
s.Init("", strings.Bytes(src), &h, ScanComments) s.Init("", strings.Bytes(src), &h, ScanComments)
...@@ -573,8 +573,8 @@ func checkError(t *testing.T, src string, tok token.Token, err string) { ...@@ -573,8 +573,8 @@ func checkError(t *testing.T, src string, tok token.Token, err string) {
if h.msg != err { if h.msg != err {
t.Errorf("%q: got msg %q, expected %q", src, h.msg, err) t.Errorf("%q: got msg %q, expected %q", src, h.msg, err)
} }
if h.pos.Offset != 0 { if h.pos.Offset != pos {
t.Errorf("%q: got offset %d, expected 0", src, h.pos.Offset) t.Errorf("%q: got offset %d, expected %d", src, h.pos.Offset, pos)
} }
} }
...@@ -582,27 +582,30 @@ func checkError(t *testing.T, src string, tok token.Token, err string) { ...@@ -582,27 +582,30 @@ func checkError(t *testing.T, src string, tok token.Token, err string) {
type srcerr struct { type srcerr struct {
src string src string
tok token.Token tok token.Token
pos int
err string err string
} }
var errors = []srcerr{ var errors = []srcerr{
srcerr{"\"\"", token.STRING, ""}, srcerr{"\"\"", token.STRING, 0, ""},
srcerr{"\"", token.STRING, "string not terminated"}, srcerr{"\"", token.STRING, 0, "string not terminated"},
srcerr{"/**/", token.COMMENT, ""}, srcerr{"/**/", token.COMMENT, 0, ""},
srcerr{"/*", token.COMMENT, "comment not terminated"}, srcerr{"/*", token.COMMENT, 0, "comment not terminated"},
srcerr{"//\n", token.COMMENT, ""}, srcerr{"//\n", token.COMMENT, 0, ""},
srcerr{"//", token.COMMENT, "comment not terminated"}, srcerr{"//", token.COMMENT, 0, "comment not terminated"},
srcerr{"077", token.INT, ""}, srcerr{"077", token.INT, 0, ""},
srcerr{"078.", token.FLOAT, ""}, srcerr{"078.", token.FLOAT, 0, ""},
srcerr{"07801234567.", token.FLOAT, ""}, srcerr{"07801234567.", token.FLOAT, 0, ""},
srcerr{"078e0", token.FLOAT, ""}, srcerr{"078e0", token.FLOAT, 0, ""},
srcerr{"078", token.INT, "illegal octal number"}, srcerr{"078", token.INT, 0, "illegal octal number"},
srcerr{"07800000009", token.INT, "illegal octal number"}, srcerr{"07800000009", token.INT, 0, "illegal octal number"},
srcerr{"\"abc\x00def\"", token.STRING, 4, "illegal character NUL"},
srcerr{"\"abc\x80def\"", token.STRING, 4, "illegal UTF-8 encoding"},
} }
func TestScanErrors(t *testing.T) { func TestScanErrors(t *testing.T) {
for _, e := range errors { for _, e := range errors {
checkError(t, e.src, e.tok, e.err) checkError(t, e.src, e.tok, e.pos, e.err)
} }
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment