Commit 7b9a6d8d authored by Robert Griesemer's avatar Robert Griesemer

go/scanner: strip carriage returns from commments

Also:
- cleaned up and simplified TestScan
- added tests for comments containing carriage returns

Fixes #3647.

R=rsc
CC=golang-dev
https://golang.org/cl/6225047
parent f7277dac
...@@ -157,11 +157,15 @@ func (s *Scanner) interpretLineComment(text []byte) { ...@@ -157,11 +157,15 @@ func (s *Scanner) interpretLineComment(text []byte) {
func (s *Scanner) scanComment() string { func (s *Scanner) scanComment() string {
// initial '/' already consumed; s.ch == '/' || s.ch == '*' // initial '/' already consumed; s.ch == '/' || s.ch == '*'
offs := s.offset - 1 // position of initial '/' offs := s.offset - 1 // position of initial '/'
hasCR := false
if s.ch == '/' { if s.ch == '/' {
//-style comment //-style comment
s.next() s.next()
for s.ch != '\n' && s.ch >= 0 { for s.ch != '\n' && s.ch >= 0 {
if s.ch == '\r' {
hasCR = true
}
s.next() s.next()
} }
if offs == s.lineOffset { if offs == s.lineOffset {
...@@ -175,6 +179,9 @@ func (s *Scanner) scanComment() string { ...@@ -175,6 +179,9 @@ func (s *Scanner) scanComment() string {
s.next() s.next()
for s.ch >= 0 { for s.ch >= 0 {
ch := s.ch ch := s.ch
if ch == '\r' {
hasCR = true
}
s.next() s.next()
if ch == '*' && s.ch == '/' { if ch == '*' && s.ch == '/' {
s.next() s.next()
...@@ -185,7 +192,12 @@ func (s *Scanner) scanComment() string { ...@@ -185,7 +192,12 @@ func (s *Scanner) scanComment() string {
s.error(offs, "comment not terminated") s.error(offs, "comment not terminated")
exit: exit:
return string(s.src[offs:s.offset]) lit := s.src[offs:s.offset]
if hasCR {
lit = stripCR(lit)
}
return string(lit)
} }
func (s *Scanner) findLineEnd() bool { func (s *Scanner) findLineEnd() bool {
...@@ -527,6 +539,8 @@ func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Tok ...@@ -527,6 +539,8 @@ func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Tok
// token.IMAG, token.CHAR, token.STRING) or token.COMMENT, the literal string // token.IMAG, token.CHAR, token.STRING) or token.COMMENT, the literal string
// has the corresponding value. // has the corresponding value.
// //
// If the returned token is a keyword, the literal string is the keyword.
//
// If the returned token is token.SEMICOLON, the corresponding // If the returned token is token.SEMICOLON, the corresponding
// literal string is ";" if the semicolon was present in the source, // literal string is ";" if the semicolon was present in the source,
// and "\n" if the semicolon was inserted because of a newline or // and "\n" if the semicolon was inserted because of a newline or
......
...@@ -43,6 +43,8 @@ var tokens = [...]elt{ ...@@ -43,6 +43,8 @@ var tokens = [...]elt{
// Special tokens // Special tokens
{token.COMMENT, "/* a comment */", special}, {token.COMMENT, "/* a comment */", special},
{token.COMMENT, "// a comment \n", special}, {token.COMMENT, "// a comment \n", special},
{token.COMMENT, "/*\r*/", special},
{token.COMMENT, "//\r\n", special},
// Identifiers and basic type literals // Identifiers and basic type literals
{token.IDENT, "foobar", literal}, {token.IDENT, "foobar", literal},
...@@ -214,8 +216,6 @@ func checkPos(t *testing.T, lit string, p token.Pos, expected token.Position) { ...@@ -214,8 +216,6 @@ func checkPos(t *testing.T, lit string, p token.Pos, expected token.Position) {
// Verify that calling Scan() provides the correct results. // Verify that calling Scan() provides the correct results.
func TestScan(t *testing.T) { func TestScan(t *testing.T) {
// make source
src_linecount := newlineCount(string(source))
whitespace_linecount := newlineCount(whitespace) whitespace_linecount := newlineCount(whitespace)
// error handler // error handler
...@@ -226,59 +226,81 @@ func TestScan(t *testing.T) { ...@@ -226,59 +226,81 @@ func TestScan(t *testing.T) {
// verify scan // verify scan
var s Scanner var s Scanner
s.Init(fset.AddFile("", fset.Base(), len(source)), source, eh, ScanComments|dontInsertSemis) s.Init(fset.AddFile("", fset.Base(), len(source)), source, eh, ScanComments|dontInsertSemis)
index := 0
// epos is the expected position // set up expected position
epos := token.Position{ epos := token.Position{
Filename: "", Filename: "",
Offset: 0, Offset: 0,
Line: 1, Line: 1,
Column: 1, Column: 1,
} }
index := 0
for { for {
pos, tok, lit := s.Scan() pos, tok, lit := s.Scan()
if lit == "" {
// no literal value for non-literal tokens // check position
lit = tok.String() if tok == token.EOF {
// correction for EOF
epos.Line = newlineCount(string(source))
epos.Column = 2
} }
checkPos(t, lit, pos, epos)
// check token
e := elt{token.EOF, "", special} e := elt{token.EOF, "", special}
if index < len(tokens) { if index < len(tokens) {
e = tokens[index] e = tokens[index]
index++
} }
if tok == token.EOF {
lit = "<EOF>"
epos.Line = src_linecount
epos.Column = 2
}
checkPos(t, lit, pos, epos)
if tok != e.tok { if tok != e.tok {
t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok) t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok)
} }
if e.tok.IsLiteral() {
// no CRs in raw string literals // check token class
elit := e.lit
if elit[0] == '`' {
elit = string(stripCR([]byte(elit)))
epos.Offset += len(e.lit) - len(lit) // correct position
}
if lit != elit {
t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit)
}
}
if tokenclass(tok) != e.class { if tokenclass(tok) != e.class {
t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class) t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
} }
epos.Offset += len(lit) + len(whitespace)
epos.Line += newlineCount(lit) + whitespace_linecount // check literal
if tok == token.COMMENT && lit[1] == '/' { elit := ""
// correct for unaccounted '/n' in //-style comment switch e.tok {
epos.Offset++ case token.COMMENT:
epos.Line++ // no CRs in comments
elit = string(stripCR([]byte(e.lit)))
//-style comment literal doesn't contain newline
if elit[1] == '/' {
elit = elit[0 : len(elit)-1]
}
case token.IDENT:
elit = e.lit
case token.SEMICOLON:
elit = ";"
default:
if e.tok.IsLiteral() {
// no CRs in raw string literals
elit = e.lit
if elit[0] == '`' {
elit = string(stripCR([]byte(elit)))
}
} else if e.tok.IsKeyword() {
elit = e.lit
}
}
if lit != elit {
t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit)
} }
index++
if tok == token.EOF { if tok == token.EOF {
break break
} }
// update position
epos.Offset += len(e.lit) + len(whitespace)
epos.Line += newlineCount(e.lit) + whitespace_linecount
} }
if s.ErrorCount != 0 { if s.ErrorCount != 0 {
t.Errorf("found %d errors", s.ErrorCount) t.Errorf("found %d errors", s.ErrorCount)
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment