Commit e64d3377 authored by Robert Griesemer's avatar Robert Griesemer

scanner: treat line comments like in Go

- don't consume '\n' as part of line comment
(otherwise grammars where '\n' are tokens won't
see them after a line comment)

- permit line comments to end in EOF

R=r
CC=golang-dev
https://golang.org/cl/4277089
parent e008757d
......@@ -331,7 +331,7 @@ func (s *Scanner) error(msg string) {
s.Error(s, msg)
return
}
fmt.Fprintf(os.Stderr, "%s: %s", s.Position, msg)
fmt.Fprintf(os.Stderr, "%s: %s\n", s.Position, msg)
}
......@@ -503,41 +503,32 @@ func (s *Scanner) scanChar() {
}
func (s *Scanner) scanLineComment() {
ch := s.next() // read character after "//"
for ch != '\n' {
if ch < 0 {
s.error("comment not terminated")
return
func (s *Scanner) scanComment(ch int) int {
// ch == '/' || ch == '*'
if ch == '/' {
// line comment
ch = s.next() // read character after "//"
for ch != '\n' && ch >= 0 {
ch = s.next()
}
ch = s.next()
return ch
}
}
func (s *Scanner) scanGeneralComment() {
ch := s.next() // read character after "/*"
// general comment
ch = s.next() // read character after "/*"
for {
if ch < 0 {
s.error("comment not terminated")
return
break
}
ch0 := ch
ch = s.next()
if ch0 == '*' && ch == '/' {
ch = s.next()
break
}
}
}
func (s *Scanner) scanComment(ch int) {
// ch == '/' || ch == '*'
if ch == '/' {
s.scanLineComment()
return
}
s.scanGeneralComment()
return ch
}
......@@ -619,13 +610,11 @@ redo:
if (ch == '/' || ch == '*') && s.Mode&ScanComments != 0 {
if s.Mode&SkipComments != 0 {
s.tokPos = -1 // don't collect token text
s.scanComment(ch)
ch = s.next()
ch = s.scanComment(ch)
goto redo
}
s.scanComment(ch)
ch = s.scanComment(ch)
tok = Comment
ch = s.next()
}
case '`':
if s.Mode&ScanRawStrings != 0 {
......
......@@ -77,15 +77,15 @@ type token struct {
var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
var tokenList = []token{
{Comment, "// line comments\n"},
{Comment, "//\n"},
{Comment, "////\n"},
{Comment, "// comment\n"},
{Comment, "// /* comment */\n"},
{Comment, "// // comment //\n"},
{Comment, "//" + f100 + "\n"},
{Comment, "// general comments\n"},
{Comment, "// line comments"},
{Comment, "//"},
{Comment, "////"},
{Comment, "// comment"},
{Comment, "// /* comment */"},
{Comment, "// // comment //"},
{Comment, "//" + f100},
{Comment, "// general comments"},
{Comment, "/**/"},
{Comment, "/***/"},
{Comment, "/* comment */"},
......@@ -94,7 +94,7 @@ var tokenList = []token{
{Comment, "/*\n comment\n*/"},
{Comment, "/*" + f100 + "*/"},
{Comment, "// identifiers\n"},
{Comment, "// identifiers"},
{Ident, "a"},
{Ident, "a0"},
{Ident, "foobar"},
......@@ -116,21 +116,21 @@ var tokenList = []token{
{Ident, "bar9876"},
{Ident, f100},
{Comment, "// decimal ints\n"},
{Comment, "// decimal ints"},
{Int, "0"},
{Int, "1"},
{Int, "9"},
{Int, "42"},
{Int, "1234567890"},
{Comment, "// octal ints\n"},
{Comment, "// octal ints"},
{Int, "00"},
{Int, "01"},
{Int, "07"},
{Int, "042"},
{Int, "01234567"},
{Comment, "// hexadecimal ints\n"},
{Comment, "// hexadecimal ints"},
{Int, "0x0"},
{Int, "0x1"},
{Int, "0xf"},
......@@ -144,7 +144,7 @@ var tokenList = []token{
{Int, "0X123456789abcDEF"},
{Int, "0X" + f100},
{Comment, "// floats\n"},
{Comment, "// floats"},
{Float, "0."},
{Float, "1."},
{Float, "42."},
......@@ -174,7 +174,7 @@ var tokenList = []token{
{Float, "42E+10"},
{Float, "01234567890E-10"},
{Comment, "// chars\n"},
{Comment, "// chars"},
{Char, `' '`},
{Char, `'a'`},
{Char, `'本'`},
......@@ -195,7 +195,7 @@ var tokenList = []token{
{Char, `'\U00000000'`},
{Char, `'\U0000ffAB'`},
{Comment, "// strings\n"},
{Comment, "// strings"},
{String, `" "`},
{String, `"a"`},
{String, `"本"`},
......@@ -217,13 +217,13 @@ var tokenList = []token{
{String, `"\U0000ffAB"`},
{String, `"` + f100 + `"`},
{Comment, "// raw strings\n"},
{Comment, "// raw strings"},
{String, "``"},
{String, "`\\`"},
{String, "`" + "\n\n/* foobar */\n\n" + "`"},
{String, "`" + f100 + "`"},
{Comment, "// individual characters\n"},
{Comment, "// individual characters"},
// NUL character is not allowed
{'\x01', "\x01"},
{' ' - 1, string(' ' - 1)},
......@@ -276,7 +276,7 @@ func countNewlines(s string) int {
func testScan(t *testing.T, mode uint) {
s := new(Scanner).Init(makeSource(" \t%s\t\n\r"))
s := new(Scanner).Init(makeSource(" \t%s\n"))
s.Mode = mode
tok := s.Scan()
line := 1
......@@ -287,7 +287,7 @@ func testScan(t *testing.T, mode uint) {
}
line += countNewlines(k.text) + 1 // each token is on a new line
}
checkTok(t, s, line, tok, -1, "")
checkTok(t, s, line, tok, EOF, "")
}
......@@ -317,6 +317,10 @@ func TestPosition(t *testing.T) {
pos.Line += countNewlines(k.text) + 1 // each token is on a new line
s.Scan()
}
// make sure there were no token-internal errors reported by scanner
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
}
......@@ -336,6 +340,9 @@ func TestScanZeroMode(t *testing.T) {
if tok != EOF {
t.Fatalf("tok = %s, want EOF", TokenString(tok))
}
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
}
......@@ -350,6 +357,9 @@ func testScanSelectedMode(t *testing.T, mode uint, class int) {
}
tok = s.Scan()
}
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
}
......@@ -367,7 +377,7 @@ func TestScanSelectedMask(t *testing.T) {
func TestScanNext(t *testing.T) {
s := new(Scanner).Init(bytes.NewBufferString("if a == bcd /* comment */ {\n\ta += c\n}"))
s := new(Scanner).Init(bytes.NewBufferString("if a == bcd /* comment */ {\n\ta += c\n} // line comment ending in eof"))
checkTok(t, s, 1, s.Scan(), Ident, "if")
checkTok(t, s, 1, s.Scan(), Ident, "a")
checkTok(t, s, 1, s.Scan(), '=', "=")
......@@ -382,6 +392,9 @@ func TestScanNext(t *testing.T) {
checkTok(t, s, 2, s.Scan(), Ident, "c")
checkTok(t, s, 3, s.Scan(), '}', "}")
checkTok(t, s, 3, s.Scan(), -1, "")
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
}
......@@ -441,7 +454,6 @@ func TestError(t *testing.T) {
testError(t, `"\'"`, "illegal char escape", String)
testError(t, `"abc`, "literal not terminated", String)
testError(t, "`abc", "literal not terminated", String)
testError(t, `//`, "comment not terminated", EOF)
testError(t, `/*/`, "comment not terminated", EOF)
testError(t, `"abc`+"\x00"+`def"`, "illegal character NUL", String)
testError(t, `"abc`+"\xff"+`def"`, "illegal UTF-8 encoding", String)
......@@ -493,6 +505,9 @@ func TestPos(t *testing.T) {
for i := 10; i > 0; i-- {
checkScanPos(t, s, 1, 2, 1, EOF)
}
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
// corner case: source with only a single character
s = new(Scanner).Init(bytes.NewBufferString("本"))
......@@ -502,6 +517,9 @@ func TestPos(t *testing.T) {
for i := 10; i > 0; i-- {
checkScanPos(t, s, 3, 1, 2, EOF)
}
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
// positions after calling Next
s = new(Scanner).Init(bytes.NewBufferString(" foo६४ \n\n本語\n"))
......@@ -524,6 +542,9 @@ func TestPos(t *testing.T) {
for i := 10; i > 0; i-- {
checkScanPos(t, s, 22, 4, 1, EOF)
}
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
// positions after calling Scan
s = new(Scanner).Init(bytes.NewBufferString("abc\n本語\n\nx"))
......@@ -543,4 +564,7 @@ func TestPos(t *testing.T) {
for i := 10; i > 0; i-- {
checkScanPos(t, s, 13, 4, 2, EOF)
}
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment