From a70caf4463f1b41709a73a56724119dd23cccdda Mon Sep 17 00:00:00 2001 From: Robert Griesemer <gri@golang.org> Date: Thu, 10 Dec 2009 15:31:02 -0800 Subject: [PATCH] implemented InsertSemis mode for go/scanner R=rsc https://golang.org/cl/175047 --- src/pkg/go/scanner/scanner.go | 96 ++++++++++++++--- src/pkg/go/scanner/scanner_test.go | 160 ++++++++++++++++++++++++++++- 2 files changed, 236 insertions(+), 20 deletions(-) diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go index 177fe0f19a..386cdb0e9f 100644 --- a/src/pkg/go/scanner/scanner.go +++ b/src/pkg/go/scanner/scanner.go @@ -29,9 +29,11 @@ type Scanner struct { mode uint; // scanning mode // scanning state - pos token.Position; // previous reading position (position before ch) - offset int; // current reading offset (position after ch) - ch int; // one char look-ahead + pos token.Position; // previous reading position (position before ch) + offset int; // current reading offset (position after ch) + ch int; // one char look-ahead + insertSemi bool; // insert a semicolon before next newline + pendingComment token.Position; // valid if pendingComment.Line > 0 // public state - ok to modify ErrorCount int; // number of errors encountered @@ -69,6 +71,7 @@ func (S *Scanner) next() { const ( ScanComments = 1 << iota; // return comments as COMMENT tokens AllowIllegalChars; // do not report an error for illegal chars + InsertSemis; // automatically insert semicolons ) @@ -420,6 +423,8 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke } +var semicolon = []byte{';'} + // Scan scans the next token and returns the token position pos, // the token tok, and the literal text lit corresponding to the // token. The source end is indicated by token.EOF. @@ -432,40 +437,63 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke // of the error handler, if there was one installed. // func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) { -scan_again: + if S.pendingComment.Line > 0 { + // "consume" pending comment + S.pos = S.pendingComment; + S.offset = S.pos.Offset + 1; + S.ch = '/'; + S.pendingComment.Line = 0; + } + +scanAgain: // skip white space - for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' || S.ch == '\r' { + for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' && !S.insertSemi || S.ch == '\r' { S.next() } // current token start + insertSemi := false; pos, tok = S.pos, token.ILLEGAL; // determine token value switch ch := S.ch; { case isLetter(ch): - tok = S.scanIdentifier() + tok = S.scanIdentifier(); + switch tok { + case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN: + insertSemi = true + default: + insertSemi = false + } case digitVal(ch) < 10: - tok = S.scanNumber(false) + insertSemi = true; + tok = S.scanNumber(false); default: S.next(); // always make progress switch ch { case -1: tok = token.EOF + case '\n': + S.insertSemi = false; + return pos, token.SEMICOLON, semicolon; case '"': + insertSemi = true; tok = token.STRING; S.scanString(pos); case '\'': + insertSemi = true; tok = token.CHAR; S.scanChar(pos); case '`': + insertSemi = true; tok = token.STRING; S.scanRawString(pos); case ':': tok = S.switch2(token.COLON, token.DEFINE) case '.': if digitVal(S.ch) < 10 { - tok = S.scanNumber(true) + insertSemi = true; + tok = S.scanNumber(true); } else if S.ch == '.' { S.next(); if S.ch == '.' { @@ -482,27 +510,57 @@ scan_again: case '(': tok = token.LPAREN case ')': - tok = token.RPAREN + insertSemi = true; + tok = token.RPAREN; case '[': tok = token.LBRACK case ']': - tok = token.RBRACK + insertSemi = true; + tok = token.RBRACK; case '{': tok = token.LBRACE case '}': - tok = token.RBRACE + insertSemi = true; + tok = token.RBRACE; case '+': - tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC) + tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC); + if tok == token.INC { + insertSemi = true + } case '-': - tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC) + tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC); + if tok == token.DEC { + insertSemi = true + } case '*': tok = S.switch2(token.MUL, token.MUL_ASSIGN) case '/': if S.ch == '/' || S.ch == '*' { - S.scanComment(pos); - tok = token.COMMENT; - if S.mode&ScanComments == 0 { - goto scan_again + // comment + newline := false; + if S.insertSemi { + if S.ch == '/' { + // a line comment acts like a newline + newline = true + } else { + // a general comment may act like a newline + S.scanComment(pos); + newline = pos.Line < S.pos.Line; + } + } else { + S.scanComment(pos) + } + if newline { + // insert a semicolon and retain pending comment + S.insertSemi = false; + S.pendingComment = pos; + return pos, token.SEMICOLON, semicolon; + } else if S.mode&ScanComments == 0 { + // skip comment + goto scanAgain + } else { + insertSemi = S.insertSemi; // preserve insertSemi info + tok = token.COMMENT; } } else { tok = S.switch2(token.QUO, token.QUO_ASSIGN) @@ -537,9 +595,13 @@ scan_again: if S.mode&AllowIllegalChars == 0 { S.error(pos, "illegal character "+charString(ch)) } + insertSemi = S.insertSemi; // preserve insertSemi info } } + if S.mode&InsertSemis != 0 { + S.insertSemi = insertSemi + } return pos, tok, S.src[pos.Offset:S.pos.Offset]; } diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go index c133289268..ddaaab27fd 100644 --- a/src/pkg/go/scanner/scanner_test.go +++ b/src/pkg/go/scanner/scanner_test.go @@ -225,13 +225,13 @@ func TestScan(t *testing.T) { } checkPos(t, lit, pos, epos); if tok != e.tok { - t.Errorf("bad token for %s: got %s, expected %s", lit, tok.String(), e.tok.String()) + t.Errorf("bad token for %q: got %s, expected %s", lit, tok.String(), e.tok.String()) } if e.tok.IsLiteral() && lit != e.lit { - t.Errorf("bad literal for %s: got %s, expected %s", lit, lit, e.lit) + t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, e.lit) } if tokenclass(tok) != e.class { - t.Errorf("bad class for %s: got %d, expected %d", lit, tokenclass(tok), e.class) + t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class) } epos.Offset += len(lit) + len(whitespace); epos.Line += NewlineCount(lit) + whitespace_linecount; @@ -249,6 +249,160 @@ func TestScan(t *testing.T) { } +func getTok(_ token.Position, tok token.Token, _ []byte) token.Token { + return tok +} + + +func checkSemi(t *testing.T, line string, mode uint) { + var S Scanner; + S.Init("TestSemis", strings.Bytes(line), nil, mode); + pos, tok, lit := S.Scan(); + for tok != token.EOF { + if tok == token.ILLEGAL { + // next token must be a semicolon + offs := pos.Offset + 1; + pos, tok, lit = S.Scan(); + if tok == token.SEMICOLON { + if pos.Offset != offs { + t.Errorf("bad offset for %q: got %d, expected %d", line, pos.Offset, offs) + } + if string(lit) != ";" { + t.Errorf(`bad literal for %q: got %q, expected ";"`, line, lit) + } + } else { + t.Errorf("bad token for %q: got %s, expected ;", line, tok.String()) + } + } else if tok == token.SEMICOLON { + t.Errorf("bad token for %q: got ;, expected no ;", line) + } + pos, tok, lit = S.Scan(); + } +} + + +var lines = []string{ + // the $ character indicates where a semicolon is expected + "", + "foo$\n", + "123$\n", + "1.2$\n", + "'x'$\n", + `"x"` + "$\n", + "`x`$\n", + + "+\n", + "-\n", + "*\n", + "/\n", + "%\n", + + "&\n", + "|\n", + "^\n", + "<<\n", + ">>\n", + "&^\n", + + "+=\n", + "-=\n", + "*=\n", + "/=\n", + "%=\n", + + "&=\n", + "|=\n", + "^=\n", + "<<=\n", + ">>=\n", + "&^=\n", + + "&&\n", + "||\n", + "<-\n", + "++$\n", + "--$\n", + + "==\n", + "<\n", + ">\n", + "=\n", + "!\n", + + "!=\n", + "<=\n", + ">=\n", + ":=\n", + "...\n", + + "(\n", + "[\n", + "{\n", + ",\n", + ".\n", + + ")$\n", + "]$\n", + "}$\n", + "$;\n", + ":\n", + + "break$\n", + "case\n", + "chan\n", + "const\n", + "continue$\n", + + "default\n", + "defer\n", + "else\n", + "fallthrough$\n", + "for\n", + + "func\n", + "go\n", + "goto\n", + "if\n", + "import\n", + + "interface\n", + "map\n", + "package\n", + "range\n", + "return$\n", + + "select\n", + "struct\n", + "switch\n", + "type\n", + "var\n", + + "foo$//comment\n", + "foo$/*comment*/\n", + "foo$/*\n*/", + "foo $// comment\n", + "foo $/*comment*/\n", + "foo $/*\n*/", + + // TODO(gri): These need to insert the semicolon *before* the + // first comment which requires arbitrary far look- + // ahead. Only relevant for gofmt placement of + // comments. + "foo /*comment*/ $\n", + "foo /*0*/ /*1*/ $/*2*/\n", +} + + +func TestSemis(t *testing.T) { + for _, line := range lines { + checkSemi(t, line, AllowIllegalChars|InsertSemis) + } + for _, line := range lines { + checkSemi(t, line, AllowIllegalChars|InsertSemis|ScanComments) + } +} + + type seg struct { srcline string; // a line of source text filename string; // filename for current token -- 2.30.9