Commit d169268a authored by Robert Griesemer's avatar Robert Griesemer

scanner cleanup - getting it ready to as a library

- removed unneeded code that accumulated over time
- change src from string to []byte (perhaps should be io.Read
but that has some other disadvantages)
- simplified interface

R=r
OCL=25615
CL=25615
parent da34bea9
...@@ -30,13 +30,12 @@ type Flags struct { ...@@ -30,13 +30,12 @@ type Flags struct {
Sixg bool; Sixg bool;
Deps bool; Deps bool;
Columns bool; Columns bool;
Testmode bool;
} }
type errorHandler struct { type errorHandler struct {
filename string; filename string;
src string; src []byte;
nerrors int; nerrors int;
nwarnings int; nwarnings int;
errpos int; errpos int;
...@@ -44,7 +43,7 @@ type errorHandler struct { ...@@ -44,7 +43,7 @@ type errorHandler struct {
} }
func (h *errorHandler) Init(filename, src string, columns bool) { func (h *errorHandler) Init(filename string, src []byte, columns bool) {
h.filename = filename; h.filename = filename;
h.src = src; h.src = src;
h.nerrors = 0; h.nerrors = 0;
...@@ -71,7 +70,7 @@ func (h *errorHandler) LineCol(pos int) (line, col int) { ...@@ -71,7 +70,7 @@ func (h *errorHandler) LineCol(pos int) (line, col int) {
} }
} }
return line, utf8.RuneCountInString(src, lpos, pos - lpos); return line, utf8.RuneCount(src[lpos : pos]);
} }
...@@ -128,10 +127,10 @@ func Compile(src_file string, flags *Flags) (*AST.Program, int) { ...@@ -128,10 +127,10 @@ func Compile(src_file string, flags *Flags) (*AST.Program, int) {
err.Init(src_file, src, flags.Columns); err.Init(src_file, src, flags.Columns);
var scanner Scanner.Scanner; var scanner Scanner.Scanner;
scanner.Init(&err, src, true, flags.Testmode); scanner.Init(src, &err, true);
var parser Parser.Parser; var parser Parser.Parser;
parser.Open(flags.Verbose, flags.Sixg, flags.Deps, &scanner); parser.Open(&scanner, err, flags.Verbose, flags.Sixg, flags.Deps);
prog := parser.ParseProgram(); prog := parser.ParseProgram();
......
...@@ -14,13 +14,20 @@ import ( ...@@ -14,13 +14,20 @@ import (
) )
type ErrorHandler interface {
Error(pos int, msg string);
Warning(pos int, msg string);
}
type Parser struct { type Parser struct {
scanner *Scanner.Scanner;
err ErrorHandler;
// Tracing/debugging // Tracing/debugging
trace, sixg, deps bool; trace, sixg, deps bool;
indent uint; indent uint;
// Scanner
scanner *Scanner.Scanner;
comments *vector.Vector; comments *vector.Vector;
// Scanner.Token // Scanner.Token
...@@ -90,7 +97,10 @@ func un/*trace*/(P *Parser) { ...@@ -90,7 +97,10 @@ func un/*trace*/(P *Parser) {
func (P *Parser) next0() { func (P *Parser) next0() {
P.pos, P.tok, P.val = P.scanner.Scan(); // TODO make P.val a []byte
var val []byte;
P.pos, P.tok, val = P.scanner.Scan();
P.val = string(val);
P.opt_semi = false; P.opt_semi = false;
if P.trace { if P.trace {
...@@ -118,13 +128,15 @@ func (P *Parser) next() { ...@@ -118,13 +128,15 @@ func (P *Parser) next() {
} }
func (P *Parser) Open(trace, sixg, deps bool, scanner *Scanner.Scanner) { func (P *Parser) Open(scanner *Scanner.Scanner, err ErrorHandler, trace, sixg, deps bool) {
P.scanner = scanner;
P.err = err;
P.trace = trace; P.trace = trace;
P.sixg = sixg; P.sixg = sixg;
P.deps = deps; P.deps = deps;
P.indent = 0; P.indent = 0;
P.scanner = scanner;
P.comments = vector.New(0); P.comments = vector.New(0);
P.next(); P.next();
...@@ -133,7 +145,7 @@ func (P *Parser) Open(trace, sixg, deps bool, scanner *Scanner.Scanner) { ...@@ -133,7 +145,7 @@ func (P *Parser) Open(trace, sixg, deps bool, scanner *Scanner.Scanner) {
func (P *Parser) error(pos int, msg string) { func (P *Parser) error(pos int, msg string) {
P.scanner.Error(pos, msg); P.err.Error(pos, msg);
} }
......
...@@ -37,10 +37,10 @@ const ( ...@@ -37,10 +37,10 @@ const (
Obj_file_ext = ".7"; Obj_file_ext = ".7";
) )
func readfile(filename string) (string, *OS.Error) { func readfile(filename string) ([]byte, *OS.Error) {
fd, err := OS.Open(filename, OS.O_RDONLY, 0); fd, err := OS.Open(filename, OS.O_RDONLY, 0);
if err != nil { if err != nil {
return "", err; return []byte(), err;
} }
var buf [1<<20]byte; var buf [1<<20]byte;
n, err1 := IO.Readn(fd, buf); n, err1 := IO.Readn(fd, buf);
...@@ -48,7 +48,7 @@ func readfile(filename string) (string, *OS.Error) { ...@@ -48,7 +48,7 @@ func readfile(filename string) (string, *OS.Error) {
if err1 == IO.ErrEOF { if err1 == IO.ErrEOF {
err1 = nil; err1 = nil;
} }
return string(buf[0:n]), err1; return buf[0:n], err1;
} }
func writefile(name, data string) *OS.Error { func writefile(name, data string) *OS.Error {
...@@ -61,17 +61,17 @@ func writefile(name, data string) *OS.Error { ...@@ -61,17 +61,17 @@ func writefile(name, data string) *OS.Error {
return err1; return err1;
} }
func ReadObjectFile(filename string) (string, bool) { func ReadObjectFile(filename string) ([]byte, bool) {
data, err := readfile(filename + Obj_file_ext); data, err := readfile(filename + Obj_file_ext);
magic := MAGIC_obj_file; // TODO remove once len(constant) works magic := MAGIC_obj_file; // TODO remove once len(constant) works
if err == nil && len(data) >= len(magic) && data[0 : len(magic)] == magic { if err == nil && len(data) >= len(magic) && string(data[0 : len(magic)]) == magic {
return data, true; return data, true;
} }
return "", false; return []byte(), false;
} }
func ReadSourceFile(name string) (string, bool) { func ReadSourceFile(name string) ([]byte, bool) {
name = Utils.TrimExt(name, Src_file_ext) + Src_file_ext; name = Utils.TrimExt(name, Src_file_ext) + Src_file_ext;
data, err := readfile(name); data, err := readfile(name);
return data, err == nil; return data, err == nil;
......
...@@ -24,7 +24,6 @@ func init() { ...@@ -24,7 +24,6 @@ func init() {
flag.BoolVar(&flags.Sixg, "6g", true, "6g compatibility mode"); flag.BoolVar(&flags.Sixg, "6g", true, "6g compatibility mode");
flag.BoolVar(&flags.Deps, "d", false, "print dependency information only"); flag.BoolVar(&flags.Deps, "d", false, "print dependency information only");
flag.BoolVar(&flags.Columns, "columns", Platform.USER == "gri", "print column info in error messages"); flag.BoolVar(&flags.Columns, "columns", Platform.USER == "gri", "print column info in error messages");
flag.BoolVar(&flags.Testmode, "t", false, "test mode: interprets /* ERROR */ and /* SYNC */ comments");
} }
...@@ -52,12 +51,9 @@ func main() { ...@@ -52,12 +51,9 @@ func main() {
} else { } else {
prog, nerrors := Compilation.Compile(src_file, &flags); prog, nerrors := Compilation.Compile(src_file, &flags);
if nerrors > 0 { if nerrors > 0 {
if flags.Testmode {
return; // TODO we shouldn't need this
}
sys.Exit(1); sys.Exit(1);
} }
if !*silent && !flags.Testmode { if !*silent {
Printer.Print(os.Stdout, *html, prog); Printer.Print(os.Stdout, *html, prog);
} }
} }
......
...@@ -2,23 +2,22 @@ ...@@ -2,23 +2,22 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package Scanner package scanner
import ( import (
"utf8"; "utf8";
"unicode"; "unicode";
"utils"; "strconv";
) )
const ( const (
ILLEGAL = iota; ILLEGAL = iota;
EOF;
IDENT;
INT; INT;
FLOAT; FLOAT;
STRING; STRING;
EOF; IDENT;
COMMENT; COMMENT;
ADD; ADD;
...@@ -52,28 +51,28 @@ const ( ...@@ -52,28 +51,28 @@ const (
DEC; DEC;
EQL; EQL;
NEQ;
LSS; LSS;
LEQ;
GTR; GTR;
GEQ;
ASSIGN; ASSIGN;
DEFINE;
NOT; NOT;
NEQ;
LEQ;
GEQ;
DEFINE;
ELLIPSIS; ELLIPSIS;
LPAREN; LPAREN;
RPAREN;
LBRACK; LBRACK;
RBRACK;
LBRACE; LBRACE;
RBRACE;
COMMA; COMMA;
PERIOD;
RPAREN;
RBRACK;
RBRACE;
SEMICOLON; SEMICOLON;
COLON; COLON;
PERIOD;
// keywords // keywords
keywords_beg; keywords_beg;
...@@ -113,13 +112,12 @@ const ( ...@@ -113,13 +112,12 @@ const (
func TokenString(tok int) string { func TokenString(tok int) string {
switch tok { switch tok {
case ILLEGAL: return "ILLEGAL"; case ILLEGAL: return "ILLEGAL";
case EOF: return "EOF";
case IDENT: return "IDENT";
case INT: return "INT"; case INT: return "INT";
case FLOAT: return "FLOAT"; case FLOAT: return "FLOAT";
case STRING: return "STRING"; case STRING: return "STRING";
case EOF: return "EOF"; case IDENT: return "IDENT";
case COMMENT: return "COMMENT"; case COMMENT: return "COMMENT";
case ADD: return "+"; case ADD: return "+";
...@@ -153,28 +151,28 @@ func TokenString(tok int) string { ...@@ -153,28 +151,28 @@ func TokenString(tok int) string {
case DEC: return "--"; case DEC: return "--";
case EQL: return "=="; case EQL: return "==";
case NEQ: return "!=";
case LSS: return "<"; case LSS: return "<";
case LEQ: return "<=";
case GTR: return ">"; case GTR: return ">";
case GEQ: return ">=";
case ASSIGN: return "="; case ASSIGN: return "=";
case DEFINE: return ":=";
case NOT: return "!"; case NOT: return "!";
case NEQ: return "!=";
case LEQ: return "<=";
case GEQ: return ">=";
case DEFINE: return ":=";
case ELLIPSIS: return "..."; case ELLIPSIS: return "...";
case LPAREN: return "("; case LPAREN: return "(";
case RPAREN: return ")";
case LBRACK: return "["; case LBRACK: return "[";
case RBRACK: return "]";
case LBRACE: return "{"; case LBRACE: return "{";
case RBRACE: return "}";
case COMMA: return ","; case COMMA: return ",";
case PERIOD: return ".";
case RPAREN: return ")";
case RBRACK: return "]";
case RBRACE: return "}";
case SEMICOLON: return ";"; case SEMICOLON: return ";";
case COLON: return ":"; case COLON: return ":";
case PERIOD: return ".";
case BREAK: return "break"; case BREAK: return "break";
case CASE: return "case"; case CASE: return "case";
...@@ -207,7 +205,7 @@ func TokenString(tok int) string { ...@@ -207,7 +205,7 @@ func TokenString(tok int) string {
case VAR: return "var"; case VAR: return "var";
} }
return "token(" + Utils.IntToString(tok, 10) + ")"; return "token(" + strconv.Itoa(tok) + ")";
} }
...@@ -258,6 +256,7 @@ func is_letter(ch int) bool { ...@@ -258,6 +256,7 @@ func is_letter(ch int) bool {
func digit_val(ch int) int { func digit_val(ch int) int {
// TODO: spec permits other Unicode digits as well
if '0' <= ch && ch <= '9' { if '0' <= ch && ch <= '9' {
return ch - '0'; return ch - '0';
} }
...@@ -273,25 +272,19 @@ func digit_val(ch int) int { ...@@ -273,25 +272,19 @@ func digit_val(ch int) int {
type ErrorHandler interface { type ErrorHandler interface {
Error(pos int, msg string); Error(pos int, msg string);
Warning(pos int, msg string);
} }
type Scanner struct { type Scanner struct {
// setup // setup
src []byte; // source
err ErrorHandler; err ErrorHandler;
src string; // source
scan_comments bool; scan_comments bool;
// scanning // scanning
pos int; // current reading position pos int; // current reading position
ch int; // one char look-ahead ch int; // one char look-ahead
chpos int; // position of ch chpos int; // position of ch
linepos int; // position of beginning of line
// testmode
testmode bool;
testpos int;
} }
...@@ -303,7 +296,7 @@ func (S *Scanner) next() { ...@@ -303,7 +296,7 @@ func (S *Scanner) next() {
r, w := int(S.src[S.pos]), 1; r, w := int(S.src[S.pos]), 1;
if r >= 0x80 { if r >= 0x80 {
// not ascii // not ascii
r, w = utf8.DecodeRuneInString(S.src, S.pos); r, w = utf8.DecodeRune(S.src[S.pos : len(S.src)]);
} }
S.ch = r; S.ch = r;
S.chpos = S.pos; S.chpos = S.pos;
...@@ -315,38 +308,16 @@ func (S *Scanner) next() { ...@@ -315,38 +308,16 @@ func (S *Scanner) next() {
} }
func (S *Scanner) Error(pos int, msg string) { func (S *Scanner) error(pos int, msg string) {
// check for expected errors (test mode)
if S.testpos < 0 || pos == S.testpos {
// test mode:
// S.testpos < 0: // follow-up errors are expected and ignored
// S.testpos == 0: // an error is expected at S.testpos and ignored
S.testpos = -1;
return;
}
S.err.Error(pos, msg); S.err.Error(pos, msg);
} }
func (S *Scanner) expectNoErrors() { func (S *Scanner) Init(src []byte, err ErrorHandler, scan_comments bool) {
// set the next expected error position to one after eof
// (the eof position is a legal error position!)
S.testpos = len(S.src) + 1;
}
func (S *Scanner) Init(err ErrorHandler, src string, scan_comments, testmode bool) {
S.err = err;
S.src = src; S.src = src;
S.err = err;
S.scan_comments = scan_comments; S.scan_comments = scan_comments;
S.next();
S.pos = 0;
S.linepos = 0;
S.testmode = testmode;
S.expectNoErrors(); // S.src must be set
S.next(); // S.expectNoErrrors() must be called before
} }
...@@ -363,13 +334,13 @@ func charString(ch int) string { ...@@ -363,13 +334,13 @@ func charString(ch int) string {
case '\\': s = `\\`; case '\\': s = `\\`;
case '\'': s = `\'`; case '\'': s = `\'`;
} }
return "'" + s + "' (U+" + Utils.IntToString(ch, 16) + ")"; return "'" + s + "' (U+" + strconv.Itob(ch, 16) + ")";
} }
func (S *Scanner) expect(ch int) { func (S *Scanner) expect(ch int) {
if S.ch != ch { if S.ch != ch {
S.Error(S.chpos, "expected " + charString(ch) + ", found " + charString(S.ch)); S.error(S.chpos, "expected " + charString(ch) + ", found " + charString(S.ch));
} }
S.next(); // make always progress S.next(); // make always progress
} }
...@@ -393,7 +364,7 @@ func (S *Scanner) skipWhitespace() { ...@@ -393,7 +364,7 @@ func (S *Scanner) skipWhitespace() {
} }
func (S *Scanner) scanComment() string { func (S *Scanner) scanComment() []byte {
// first '/' already consumed // first '/' already consumed
pos := S.chpos - 1; pos := S.chpos - 1;
...@@ -422,37 +393,14 @@ func (S *Scanner) scanComment() string { ...@@ -422,37 +393,14 @@ func (S *Scanner) scanComment() string {
} }
} }
S.Error(pos, "comment not terminated"); S.error(pos, "comment not terminated");
exit: exit:
comment := S.src[pos : S.chpos]; return S.src[pos : S.chpos];
if S.testmode {
// interpret ERROR and SYNC comments
oldpos := -1;
switch {
case len(comment) >= 8 && comment[3 : 8] == "ERROR" :
// an error is expected at the next token position
oldpos = S.testpos;
S.skipWhitespace();
S.testpos = S.chpos;
case len(comment) >= 7 && comment[3 : 7] == "SYNC" :
// scanning/parsing synchronized again - no (follow-up) errors expected
oldpos = S.testpos;
S.expectNoErrors();
}
if 0 <= oldpos && oldpos <= len(S.src) {
// the previous error was not found
S.Error(oldpos, "ERROR not found"); // TODO this should call ErrorMsg
}
}
return comment;
} }
func (S *Scanner) scanIdentifier() (tok int, val string) { func (S *Scanner) scanIdentifier() (tok int, val []byte) {
pos := S.chpos; pos := S.chpos;
for is_letter(S.ch) || digit_val(S.ch) < 10 { for is_letter(S.ch) || digit_val(S.ch) < 10 {
S.next(); S.next();
...@@ -460,7 +408,7 @@ func (S *Scanner) scanIdentifier() (tok int, val string) { ...@@ -460,7 +408,7 @@ func (S *Scanner) scanIdentifier() (tok int, val string) {
val = S.src[pos : S.chpos]; val = S.src[pos : S.chpos];
var present bool; var present bool;
tok, present = keywords[val]; tok, present = keywords[string(val)];
if !present { if !present {
tok = IDENT; tok = IDENT;
} }
...@@ -476,7 +424,7 @@ func (S *Scanner) scanMantissa(base int) { ...@@ -476,7 +424,7 @@ func (S *Scanner) scanMantissa(base int) {
} }
func (S *Scanner) scanNumber(seen_decimal_point bool) (tok int, val string) { func (S *Scanner) scanNumber(seen_decimal_point bool) (tok int, val []byte) {
pos := S.chpos; pos := S.chpos;
tok = INT; tok = INT;
...@@ -540,50 +488,33 @@ func (S *Scanner) scanDigits(n int, base int) { ...@@ -540,50 +488,33 @@ func (S *Scanner) scanDigits(n int, base int) {
n--; n--;
} }
if n > 0 { if n > 0 {
S.Error(S.chpos, "illegal char escape"); S.error(S.chpos, "illegal char escape");
} }
} }
func (S *Scanner) scanEscape(quote int) string { func (S *Scanner) scanEscape(quote int) {
// TODO: fix this routine
ch := S.ch; ch := S.ch;
pos := S.chpos; pos := S.chpos;
S.next(); S.next();
switch ch { switch ch {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\': case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
return string(ch); // nothing to do
case '0', '1', '2', '3', '4', '5', '6', '7': case '0', '1', '2', '3', '4', '5', '6', '7':
S.scanDigits(3 - 1, 8); // 1 char already read S.scanDigits(3 - 1, 8); // 1 char read already
return ""; // TODO fix this
case 'x': case 'x':
S.scanDigits(2, 16); S.scanDigits(2, 16);
return ""; // TODO fix this
case 'u': case 'u':
S.scanDigits(4, 16); S.scanDigits(4, 16);
return ""; // TODO fix this
case 'U': case 'U':
S.scanDigits(8, 16); S.scanDigits(8, 16);
return ""; // TODO fix this
default: default:
// check for quote outside the switch for better generated code (eventually) S.error(pos, "illegal char escape");
if ch == quote {
return string(quote);
} }
S.Error(pos, "illegal char escape");
}
return ""; // TODO fix this
} }
func (S *Scanner) scanChar() string { func (S *Scanner) scanChar() []byte {
// '\'' already consumed // '\'' already consumed
pos := S.chpos - 1; pos := S.chpos - 1;
...@@ -598,7 +529,7 @@ func (S *Scanner) scanChar() string { ...@@ -598,7 +529,7 @@ func (S *Scanner) scanChar() string {
} }
func (S *Scanner) scanString() string { func (S *Scanner) scanString() []byte {
// '"' already consumed // '"' already consumed
pos := S.chpos - 1; pos := S.chpos - 1;
...@@ -606,7 +537,7 @@ func (S *Scanner) scanString() string { ...@@ -606,7 +537,7 @@ func (S *Scanner) scanString() string {
ch := S.ch; ch := S.ch;
S.next(); S.next();
if ch == '\n' || ch < 0 { if ch == '\n' || ch < 0 {
S.Error(pos, "string not terminated"); S.error(pos, "string not terminated");
break; break;
} }
if ch == '\\' { if ch == '\\' {
...@@ -619,7 +550,7 @@ func (S *Scanner) scanString() string { ...@@ -619,7 +550,7 @@ func (S *Scanner) scanString() string {
} }
func (S *Scanner) scanRawString() string { func (S *Scanner) scanRawString() []byte {
// '`' already consumed // '`' already consumed
pos := S.chpos - 1; pos := S.chpos - 1;
...@@ -627,7 +558,7 @@ func (S *Scanner) scanRawString() string { ...@@ -627,7 +558,7 @@ func (S *Scanner) scanRawString() string {
ch := S.ch; ch := S.ch;
S.next(); S.next();
if ch == '\n' || ch < 0 { if ch == '\n' || ch < 0 {
S.Error(pos, "string not terminated"); S.error(pos, "string not terminated");
break; break;
} }
} }
...@@ -676,7 +607,7 @@ func (S *Scanner) select4(tok0, tok1, ch2, tok2, tok3 int) int { ...@@ -676,7 +607,7 @@ func (S *Scanner) select4(tok0, tok1, ch2, tok2, tok3 int) int {
} }
func (S *Scanner) Scan() (pos, tok int, val string) { func (S *Scanner) Scan() (pos, tok int, val []byte) {
loop: loop:
S.skipWhitespace(); S.skipWhitespace();
...@@ -689,7 +620,7 @@ loop: ...@@ -689,7 +620,7 @@ loop:
S.next(); // always make progress S.next(); // always make progress
switch ch { switch ch {
case -1: tok = EOF; case -1: tok = EOF;
case '\n': tok, val = COMMENT, "\n"; case '\n': tok, val = COMMENT, []byte('\n');
case '"': tok, val = STRING, S.scanString(); case '"': tok, val = STRING, S.scanString();
case '\'': tok, val = INT, S.scanChar(); case '\'': tok, val = INT, S.scanChar();
case '`': tok, val = STRING, S.scanRawString(); case '`': tok, val = STRING, S.scanRawString();
...@@ -741,7 +672,7 @@ loop: ...@@ -741,7 +672,7 @@ loop:
case '&': tok = S.select3(AND, AND_ASSIGN, '&', LAND); case '&': tok = S.select3(AND, AND_ASSIGN, '&', LAND);
case '|': tok = S.select3(OR, OR_ASSIGN, '|', LOR); case '|': tok = S.select3(OR, OR_ASSIGN, '|', LOR);
default: default:
S.Error(pos, "illegal character " + charString(ch)); S.error(pos, "illegal character " + charString(ch));
tok = ILLEGAL; tok = ILLEGAL;
} }
} }
......
...@@ -119,16 +119,6 @@ runtests() { ...@@ -119,16 +119,6 @@ runtests() {
} }
# run selftest1 always
$CMD -t selftest1.go > $TMP1
if [ $? != 0 ]; then
cat $TMP1
echo "Error (selftest1): $CMD -t selftest1.go"
exit 1
fi
count selftest1.go
# run over all .go files # run over all .go files
runtests $* runtests $*
cleanup cleanup
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment