Commit c8683ff7 authored by Robert Griesemer's avatar Robert Griesemer Committed by Matthew Dempsky

cmd/compile/internal/syntax: fast Go syntax trees, initial commit.

Syntax tree nodes, scanner, parser, basic printers.

Builds syntax trees for entire Go std lib at a rate of ~1.8M lines/s
in warmed up state (MacMini, 2.3 GHz Intel Core i7, 8GB RAM):

$ go test -run StdLib -fast
parsed 1074617 lines (2832 files) in 579.66364ms (1853863 lines/s)
allocated 282.212Mb (486.854Mb/s)
PASS

Change-Id: Ie26d9a7bf4e5ff07457aedfcc9b89f0eba72ae3f
Reviewed-on: https://go-review.googlesource.com/27195
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
Reviewed-by: default avatarRobert Griesemer <gri@golang.org>
parent 3b967be4
...@@ -18,6 +18,10 @@ go src=.. ...@@ -18,6 +18,10 @@ go src=..
asm asm
testdata testdata
+ +
compile
internal
syntax
parser.go
doc doc
main.go main.go
pkg.go pkg.go
......
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file implements printing of syntax tree structures.
package syntax
import (
"fmt"
"io"
"reflect"
"unicode"
"unicode/utf8"
)
// Fdump dumps the structure of the syntax tree rooted at n to w.
// It is intended for debugging purposes; no specific output format
// is guaranteed.
func Fdump(w io.Writer, n Node) (err error) {
p := dumper{
output: w,
ptrmap: make(map[Node]int),
last: '\n', // force printing of line number on first line
}
defer func() {
if e := recover(); e != nil {
err = e.(localError).err // re-panics if it's not a localError
}
}()
if n == nil {
p.printf("nil\n")
return
}
p.dump(reflect.ValueOf(n), n)
p.printf("\n")
return
}
type dumper struct {
output io.Writer
ptrmap map[Node]int // node -> dump line number
indent int // current indentation level
last byte // last byte processed by Write
line int // current line number
}
var indentBytes = []byte(". ")
func (p *dumper) Write(data []byte) (n int, err error) {
var m int
for i, b := range data {
// invariant: data[0:n] has been written
if b == '\n' {
m, err = p.output.Write(data[n : i+1])
n += m
if err != nil {
return
}
} else if p.last == '\n' {
p.line++
_, err = fmt.Fprintf(p.output, "%6d ", p.line)
if err != nil {
return
}
for j := p.indent; j > 0; j-- {
_, err = p.output.Write(indentBytes)
if err != nil {
return
}
}
}
p.last = b
}
if len(data) > n {
m, err = p.output.Write(data[n:])
n += m
}
return
}
// localError wraps locally caught errors so we can distinguish
// them from genuine panics which we don't want to return as errors.
type localError struct {
err error
}
// printf is a convenience wrapper that takes care of print errors.
func (p *dumper) printf(format string, args ...interface{}) {
if _, err := fmt.Fprintf(p, format, args...); err != nil {
panic(localError{err})
}
}
// dump prints the contents of x.
// If x is the reflect.Value of a struct s, where &s
// implements Node, then &s should be passed for n -
// this permits printing of the unexported span and
// comments fields of the embedded isNode field by
// calling the Span() and Comment() instead of using
// reflection.
func (p *dumper) dump(x reflect.Value, n Node) {
switch x.Kind() {
case reflect.Interface:
if x.IsNil() {
p.printf("nil")
return
}
p.dump(x.Elem(), nil)
case reflect.Ptr:
if x.IsNil() {
p.printf("nil")
return
}
// special cases for identifiers w/o attached comments (common case)
if x, ok := x.Interface().(*Name); ok {
p.printf(x.Value)
return
}
p.printf("*")
// Fields may share type expressions, and declarations
// may share the same group - use ptrmap to keep track
// of nodes that have been printed already.
if ptr, ok := x.Interface().(Node); ok {
if line, exists := p.ptrmap[ptr]; exists {
p.printf("(Node @ %d)", line)
return
}
p.ptrmap[ptr] = p.line
n = ptr
}
p.dump(x.Elem(), n)
case reflect.Slice:
if x.IsNil() {
p.printf("nil")
return
}
p.printf("%s (%d entries) {", x.Type(), x.Len())
if x.Len() > 0 {
p.indent++
p.printf("\n")
for i, n := 0, x.Len(); i < n; i++ {
p.printf("%d: ", i)
p.dump(x.Index(i), nil)
p.printf("\n")
}
p.indent--
}
p.printf("}")
case reflect.Struct:
typ := x.Type()
// if span, ok := x.Interface().(lexical.Span); ok {
// p.printf("%s", &span)
// return
// }
p.printf("%s {", typ)
p.indent++
first := true
if n != nil {
p.printf("\n")
first = false
// p.printf("Span: %s\n", n.Span())
// if c := *n.Comments(); c != nil {
// p.printf("Comments: ")
// p.dump(reflect.ValueOf(c), nil) // a Comment is not a Node
// p.printf("\n")
// }
}
for i, n := 0, typ.NumField(); i < n; i++ {
// Exclude non-exported fields because their
// values cannot be accessed via reflection.
if name := typ.Field(i).Name; isExported(name) {
if first {
p.printf("\n")
first = false
}
p.printf("%s: ", name)
p.dump(x.Field(i), nil)
p.printf("\n")
}
}
p.indent--
p.printf("}")
default:
switch x := x.Interface().(type) {
case string:
// print strings in quotes
p.printf("%q", x)
default:
p.printf("%v", x)
}
}
}
func isExported(name string) bool {
ch, _ := utf8.DecodeRuneInString(name)
return unicode.IsUpper(ch)
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package syntax
import (
"os"
"testing"
)
func TestDump(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode")
}
ast, err := ReadFile(*src, nil, 0)
if err != nil {
t.Fatal(err)
}
Fdump(os.Stdout, ast)
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package syntax
// ----------------------------------------------------------------------------
// Nodes
type Node interface {
aNode()
}
type node struct {
doc *Comment // nil means no comment(s) attached
pos uint32
line uint32
}
func (*node) aNode() {}
func (n *node) init(p *parser) {
n.pos = uint32(p.pos)
n.line = uint32(p.line)
}
// ----------------------------------------------------------------------------
// Files
type File struct {
PkgName *Name
DeclList []Decl
Pragmas []Pragma
Lines int
node
}
type Pragma struct {
Line int
Text string
}
// ----------------------------------------------------------------------------
// Declarations
type (
Decl interface {
Node
aDecl()
}
ImportDecl struct {
LocalPkgName *Name // including "."; nil means no rename present
Path *BasicLit
Group *Group // nil means not part of a group
decl
}
ConstDecl struct {
NameList []*Name
Type Expr // nil means no type
Values Expr // nil means no values
Group *Group // nil means not part of a group
decl
}
TypeDecl struct {
Name *Name
Type Expr
Group *Group // nil means not part of a group
decl
}
VarDecl struct {
NameList []*Name
Type Expr // nil means no type
Values Expr // nil means no values
Group *Group // nil means not part of a group
decl
}
FuncDecl struct {
Attr map[string]bool // go:attr map
Recv *Field // nil means regular function
Name *Name
Type *FuncType
Body []Stmt // nil means no body (forward declaration)
decl
}
)
type decl struct{ node }
func (*decl) aDecl() {}
// All declarations belonging to the same group point to the same Group node.
type Group struct {
dummy int // not empty so we are guaranteed different Group instances
}
// ----------------------------------------------------------------------------
// Expressions
type (
Expr interface {
Node
aExpr()
}
// Value
Name struct {
Value string
expr
}
// Value
BasicLit struct {
Value string
Kind LitKind
expr
}
// Type { ElemList[0], ElemList[1], ... }
CompositeLit struct {
Type Expr // nil means no literal type
ElemList []Expr
NKeys int // number of elements with keys
expr
}
// Key: Value
KeyValueExpr struct {
Key, Value Expr
expr
}
// func Type { Body }
FuncLit struct {
Type *FuncType
Body []Stmt
expr
}
// (X)
ParenExpr struct {
X Expr
expr
}
// X.Sel
SelectorExpr struct {
X Expr
Sel *Name
expr
}
// X[Index]
IndexExpr struct {
X Expr
Index Expr
expr
}
// X[Index[0] : Index[1] : Index[2]]
SliceExpr struct {
X Expr
Index [3]Expr
expr
}
// X.(Type)
AssertExpr struct {
X Expr
// TODO(gri) consider using Name{"..."} instead of nil (permits attaching of comments)
Type Expr
expr
}
Operation struct {
Op Operator
X, Y Expr // Y == nil means unary expression
expr
}
// Fun(ArgList[0], ArgList[1], ...)
CallExpr struct {
Fun Expr
ArgList []Expr
HasDots bool // last argument is followed by ...
expr
}
// ElemList[0], ElemList[1], ...
ListExpr struct {
ElemList []Expr
expr
}
// [Len]Elem
ArrayType struct {
// TODO(gri) consider using Name{"..."} instead of nil (permits attaching of comments)
Len Expr // nil means Len is ...
Elem Expr
expr
}
// []Elem
SliceType struct {
Elem Expr
expr
}
// ...Elem
DotsType struct {
Elem Expr
expr
}
// struct { FieldList[0] TagList[0]; FieldList[1] TagList[1]; ... }
StructType struct {
FieldList []*Field
TagList []*BasicLit // i >= len(TagList) || TagList[i] == nil means no tag for field i
expr
}
// Name Type
// Type
Field struct {
Name *Name // nil means anonymous field/parameter (structs/parameters), or embedded interface (interfaces)
Type Expr // field names declared in a list share the same Type (identical pointers)
node
}
// interface { MethodList[0]; MethodList[1]; ... }
InterfaceType struct {
MethodList []*Field
expr
}
FuncType struct {
ParamList []*Field
ResultList []*Field
expr
}
// map[Key]Value
MapType struct {
Key Expr
Value Expr
expr
}
// chan Elem
// <-chan Elem
// chan<- Elem
ChanType struct {
Dir ChanDir // 0 means no direction
Elem Expr
expr
}
)
type expr struct{ node }
func (*expr) aExpr() {}
type ChanDir uint
const (
_ ChanDir = iota
SendOnly
RecvOnly
)
// ----------------------------------------------------------------------------
// Statements
type (
Stmt interface {
Node
aStmt()
}
SimpleStmt interface {
Stmt
aSimpleStmt()
}
EmptyStmt struct {
simpleStmt
}
LabeledStmt struct {
Label *Name
Stmt Stmt
stmt
}
BlockStmt struct {
Body []Stmt
stmt
}
ExprStmt struct {
X Expr
simpleStmt
}
SendStmt struct {
Chan, Value Expr // Chan <- Value
simpleStmt
}
DeclStmt struct {
DeclList []Decl
stmt
}
AssignStmt struct {
Op Operator // 0 means no operation
Lhs, Rhs Expr // Rhs == ImplicitOne means Lhs++ (Op == Add) or Lhs-- (Op == Sub)
simpleStmt
}
BranchStmt struct {
Tok token // Break, Continue, Fallthrough, or Goto
Label *Name
stmt
}
CallStmt struct {
Tok token // Go or Defer
Call *CallExpr
stmt
}
ReturnStmt struct {
Results Expr // nil means no explicit return values
stmt
}
IfStmt struct {
Init SimpleStmt
Cond Expr
Then []Stmt
Else Stmt // either *IfStmt or *BlockStmt
stmt
}
ForStmt struct {
Init SimpleStmt // incl. *RangeClause
Cond Expr
Post SimpleStmt
Body []Stmt
stmt
}
SwitchStmt struct {
Init SimpleStmt
Tag Expr
Body []*CaseClause
stmt
}
SelectStmt struct {
Body []*CommClause
stmt
}
)
type (
RangeClause struct {
Lhs Expr // nil means no Lhs = or Lhs :=
Def bool // means :=
X Expr // range X
simpleStmt
}
TypeSwitchGuard struct {
// TODO(gri) consider using Name{"..."} instead of nil (permits attaching of comments)
Lhs *Name // nil means no Lhs :=
X Expr // X.(type)
expr
}
CaseClause struct {
Cases Expr // nil means default clause
Body []Stmt
node
}
CommClause struct {
Comm SimpleStmt // send or receive stmt; nil means default clause
Body []Stmt
node
}
)
type stmt struct{ node }
func (stmt) aStmt() {}
type simpleStmt struct {
stmt
}
func (simpleStmt) aSimpleStmt() {}
// ----------------------------------------------------------------------------
// Comments
type CommentKind uint
const (
Above CommentKind = iota
Below
Left
Right
)
type Comment struct {
Kind CommentKind
Text string
Next *Comment
}
This diff is collapsed.
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package syntax
import (
"bytes"
"flag"
"fmt"
"io/ioutil"
"path/filepath"
"runtime"
"strings"
"sync"
"testing"
"time"
)
var fast = flag.Bool("fast", false, "parse package files in parallel")
var src = flag.String("src", "parser.go", "source file to parse")
var verify = flag.Bool("verify", false, "verify idempotent printing")
func TestParse(t *testing.T) {
_, err := ReadFile(*src, nil, 0)
if err != nil {
t.Fatal(err)
}
}
func TestStdLib(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode")
}
var m1 runtime.MemStats
runtime.ReadMemStats(&m1)
start := time.Now()
type parseResult struct {
filename string
lines int
}
results := make(chan parseResult)
go func() {
for _, dir := range []string{
runtime.GOROOT(),
//"/Users/gri/src",
} {
walkDirs(t, dir, func(filename string) {
if debug {
fmt.Printf("parsing %s\n", filename)
}
ast, err := ReadFile(filename, nil, 0)
if err != nil {
t.Fatal(err)
}
if *verify {
verifyPrint(filename, ast)
}
results <- parseResult{filename, ast.Lines}
})
}
close(results)
}()
var count, lines int
for res := range results {
count++
lines += res.lines
if testing.Verbose() {
fmt.Printf("%5d %s (%d lines)\n", count, res.filename, res.lines)
}
}
dt := time.Since(start)
var m2 runtime.MemStats
runtime.ReadMemStats(&m2)
dm := float64(m2.TotalAlloc-m1.TotalAlloc) / 1e6
fmt.Printf("parsed %d lines (%d files) in %v (%d lines/s)\n", lines, count, dt, int64(float64(lines)/dt.Seconds()))
fmt.Printf("allocated %.3fMb (%.3fMb/s)\n", dm, dm/dt.Seconds())
}
func walkDirs(t *testing.T, dir string, action func(string)) {
fis, err := ioutil.ReadDir(dir)
if err != nil {
t.Error(err)
return
}
var files, dirs []string
for _, fi := range fis {
if fi.Mode().IsRegular() {
if strings.HasSuffix(fi.Name(), ".go") {
path := filepath.Join(dir, fi.Name())
files = append(files, path)
}
} else if fi.IsDir() && fi.Name() != "testdata" {
path := filepath.Join(dir, fi.Name())
if !strings.Contains(path, "go/test") {
dirs = append(dirs, path)
}
}
}
if *fast {
var wg sync.WaitGroup
wg.Add(len(files))
for _, filename := range files {
go func(filename string) {
defer wg.Done()
action(filename)
}(filename)
}
wg.Wait()
} else {
for _, filename := range files {
action(filename)
}
}
for _, dir := range dirs {
walkDirs(t, dir, action)
}
}
func verifyPrint(filename string, ast1 *File) {
var buf1 bytes.Buffer
_, err := Fprint(&buf1, ast1, true)
if err != nil {
panic(err)
}
ast2, err := ReadBytes(buf1.Bytes(), nil, 0)
if err != nil {
panic(err)
}
var buf2 bytes.Buffer
_, err = Fprint(&buf2, ast2, true)
if err != nil {
panic(err)
}
if bytes.Compare(buf1.Bytes(), buf2.Bytes()) != 0 {
fmt.Printf("--- %s ---\n", filename)
fmt.Printf("%s\n", buf1.Bytes())
fmt.Println()
fmt.Printf("--- %s ---\n", filename)
fmt.Printf("%s\n", buf2.Bytes())
fmt.Println()
panic("not equal")
}
}
This diff is collapsed.
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package syntax
import (
"fmt"
"os"
"testing"
)
func TestPrint(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode")
}
ast, err := ReadFile(*src, nil, 0)
if err != nil {
t.Fatal(err)
}
Fprint(os.Stdout, ast, true)
fmt.Println()
}
This diff is collapsed.
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package syntax
import (
"fmt"
"os"
"testing"
)
func TestScanner(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode")
}
src, err := os.Open("parser.go")
if err != nil {
t.Fatal(err)
}
defer src.Close()
var s scanner
s.init(src, nil)
for {
s.next()
if s.tok == _EOF {
break
}
switch s.tok {
case _Name:
fmt.Println(s.line, s.tok, "=>", s.lit)
case _Operator:
fmt.Println(s.line, s.tok, "=>", s.op, s.prec)
default:
fmt.Println(s.line, s.tok)
}
}
}
func TestTokens(t *testing.T) {
// make source
var buf []byte
for i, s := range sampleTokens {
buf = append(buf, "\t\t\t\t"[:i&3]...) // leading indentation
buf = append(buf, s.src...) // token
buf = append(buf, " "[:i&7]...) // trailing spaces
buf = append(buf, "/* foo */ // bar\n"...) // comments
}
// scan source
var got scanner
got.init(&bytesReader{buf}, nil)
got.next()
for i, want := range sampleTokens {
nlsemi := false
if got.line != i+1 {
t.Errorf("got line %d; want %d", got.line, i+1)
}
if got.tok != want.tok {
t.Errorf("got tok = %s; want %s", got.tok, want.tok)
continue
}
switch want.tok {
case _Name, _Literal:
if got.lit != want.src {
t.Errorf("got lit = %q; want %q", got.lit, want.src)
continue
}
nlsemi = true
case _Operator, _AssignOp, _IncOp:
if got.op != want.op {
t.Errorf("got op = %s; want %s", got.op, want.op)
continue
}
if got.prec != want.prec {
t.Errorf("got prec = %s; want %s", got.prec, want.prec)
continue
}
nlsemi = want.tok == _IncOp
case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return:
nlsemi = true
}
if nlsemi {
got.next()
if got.tok != _Semi {
t.Errorf("got tok = %s; want ;", got.tok)
continue
}
}
got.next()
}
if got.tok != _EOF {
t.Errorf("got %q; want _EOF", got.tok)
}
}
var sampleTokens = [...]struct {
tok token
src string
op Operator
prec int
}{
// name samples
{_Name, "x", 0, 0},
{_Name, "X123", 0, 0},
{_Name, "foo", 0, 0},
{_Name, "Foo123", 0, 0},
{_Name, "foo_bar", 0, 0},
{_Name, "_", 0, 0},
{_Name, "_foobar", 0, 0},
{_Name, "a۰۱۸", 0, 0},
{_Name, "foo६४", 0, 0},
{_Name, "bar9876", 0, 0},
{_Name, "ŝ", 0, 0},
{_Name, "ŝfoo", 0, 0},
// literal samples
{_Literal, "0", 0, 0},
{_Literal, "1", 0, 0},
{_Literal, "12345", 0, 0},
{_Literal, "123456789012345678890123456789012345678890", 0, 0},
{_Literal, "01234567", 0, 0},
{_Literal, "0x0", 0, 0},
{_Literal, "0xcafebabe", 0, 0},
{_Literal, "0.", 0, 0},
{_Literal, "0.e0", 0, 0},
{_Literal, "0.e-1", 0, 0},
{_Literal, "0.e+123", 0, 0},
{_Literal, ".0", 0, 0},
{_Literal, ".0E00", 0, 0},
{_Literal, ".0E-0123", 0, 0},
{_Literal, ".0E+12345678901234567890", 0, 0},
{_Literal, ".45e1", 0, 0},
{_Literal, "3.14159265", 0, 0},
{_Literal, "1e0", 0, 0},
{_Literal, "1e+100", 0, 0},
{_Literal, "1e-100", 0, 0},
{_Literal, "2.71828e-1000", 0, 0},
{_Literal, "0i", 0, 0},
{_Literal, "1i", 0, 0},
{_Literal, "012345678901234567889i", 0, 0},
{_Literal, "123456789012345678890i", 0, 0},
{_Literal, "0.i", 0, 0},
{_Literal, ".0i", 0, 0},
{_Literal, "3.14159265i", 0, 0},
{_Literal, "1e0i", 0, 0},
{_Literal, "1e+100i", 0, 0},
{_Literal, "1e-100i", 0, 0},
{_Literal, "2.71828e-1000i", 0, 0},
{_Literal, "'a'", 0, 0},
{_Literal, "'\\000'", 0, 0},
{_Literal, "'\\xFF'", 0, 0},
{_Literal, "'\\uff16'", 0, 0},
{_Literal, "'\\U0000ff16'", 0, 0},
{_Literal, "`foobar`", 0, 0},
{_Literal, "`foo\tbar`", 0, 0},
{_Literal, "`\r`", 0, 0},
// operators
{_Operator, "||", OrOr, precOrOr},
{_Operator, "&&", AndAnd, precAndAnd},
{_Operator, "==", Eql, precCmp},
{_Operator, "!=", Neq, precCmp},
{_Operator, "<", Lss, precCmp},
{_Operator, "<=", Leq, precCmp},
{_Operator, ">", Gtr, precCmp},
{_Operator, ">=", Geq, precCmp},
{_Operator, "+", Add, precAdd},
{_Operator, "-", Sub, precAdd},
{_Operator, "|", Or, precAdd},
{_Operator, "^", Xor, precAdd},
{_Star, "*", Mul, precMul},
{_Operator, "/", Div, precMul},
{_Operator, "%", Rem, precMul},
{_Operator, "&", And, precMul},
{_Operator, "&^", AndNot, precMul},
{_Operator, "<<", Shl, precMul},
{_Operator, ">>", Shr, precMul},
// assignment operations
{_AssignOp, "+=", Add, precAdd},
{_AssignOp, "-=", Sub, precAdd},
{_AssignOp, "|=", Or, precAdd},
{_AssignOp, "^=", Xor, precAdd},
{_AssignOp, "*=", Mul, precMul},
{_AssignOp, "/=", Div, precMul},
{_AssignOp, "%=", Rem, precMul},
{_AssignOp, "&=", And, precMul},
{_AssignOp, "&^=", AndNot, precMul},
{_AssignOp, "<<=", Shl, precMul},
{_AssignOp, ">>=", Shr, precMul},
// other operations
{_IncOp, "++", Add, precAdd},
{_IncOp, "--", Sub, precAdd},
{_Assign, "=", 0, 0},
{_Define, ":=", 0, 0},
{_Arrow, "<-", 0, 0},
// delimiters
{_Lparen, "(", 0, 0},
{_Lbrack, "[", 0, 0},
{_Lbrace, "{", 0, 0},
{_Rparen, ")", 0, 0},
{_Rbrack, "]", 0, 0},
{_Rbrace, "}", 0, 0},
{_Comma, ",", 0, 0},
{_Semi, ";", 0, 0},
{_Colon, ":", 0, 0},
{_Dot, ".", 0, 0},
{_DotDotDot, "...", 0, 0},
// keywords
{_Break, "break", 0, 0},
{_Case, "case", 0, 0},
{_Chan, "chan", 0, 0},
{_Const, "const", 0, 0},
{_Continue, "continue", 0, 0},
{_Default, "default", 0, 0},
{_Defer, "defer", 0, 0},
{_Else, "else", 0, 0},
{_Fallthrough, "fallthrough", 0, 0},
{_For, "for", 0, 0},
{_Func, "func", 0, 0},
{_Go, "go", 0, 0},
{_Goto, "goto", 0, 0},
{_If, "if", 0, 0},
{_Import, "import", 0, 0},
{_Interface, "interface", 0, 0},
{_Map, "map", 0, 0},
{_Package, "package", 0, 0},
{_Range, "range", 0, 0},
{_Return, "return", 0, 0},
{_Select, "select", 0, 0},
{_Struct, "struct", 0, 0},
{_Switch, "switch", 0, 0},
{_Type, "type", 0, 0},
{_Var, "var", 0, 0},
}
func TestScanErrors(t *testing.T) {
for _, test := range []struct {
src, msg string
pos, line int
}{
// Note: Positions for lexical errors are the earliest position
// where the error is apparent, not the beginning of the respective
// token.
// rune-level errors
{"fo\x00o", "invalid NUL character", 2, 1},
{"foo\n\ufeff bar", "invalid BOM in the middle of the file", 4, 2},
{"foo\n\n\xff ", "invalid UTF-8 encoding", 5, 3},
// token-level errors
{"x + ~y", "bitwise complement operator is ^", 4, 1},
{"foo$bar = 0", "invalid rune '$'", 3, 1},
{"const x = 0xyz", "malformed hex constant", 12, 1},
{"0123456789", "malformed octal constant", 10, 1},
{"0123456789. /* foobar", "comment not terminated", 12, 1}, // valid float constant
{"0123456789e0 /*\nfoobar", "comment not terminated", 13, 1}, // valid float constant
{"var a, b = 08, 07\n", "malformed octal constant", 13, 1},
{"(x + 1.0e+x)", "malformed floating-point constant exponent", 10, 1},
{`''`, "empty character literal", 1, 1},
{"'\n", "newline in character literal", 1, 1},
{`'\`, "missing '", 2, 1},
{`'\'`, "missing '", 3, 1},
{`'\x`, "missing '", 3, 1},
{`'\x'`, "escape sequence incomplete", 3, 1},
{`'\y'`, "unknown escape sequence", 2, 1},
{`'\x0'`, "escape sequence incomplete", 4, 1},
{`'\00'`, "escape sequence incomplete", 4, 1},
{`'\377' /*`, "comment not terminated", 7, 1}, // valid octal escape
{`'\378`, "illegal character U+0038 '8' in escape sequence", 4, 1},
{`'\400'`, "octal escape value > 255: 256", 5, 1},
{`'xx`, "missing '", 2, 1},
{"\"\n", "newline in string", 1, 1},
{`"`, "string not terminated", 0, 1},
{`"foo`, "string not terminated", 0, 1},
{"`", "string not terminated", 0, 1},
{"`foo", "string not terminated", 0, 1},
{"/*/", "comment not terminated", 0, 1},
{"/*\n\nfoo", "comment not terminated", 0, 1},
{"/*\n\nfoo", "comment not terminated", 0, 1},
{`"\`, "string not terminated", 0, 1},
{`"\"`, "string not terminated", 0, 1},
{`"\x`, "string not terminated", 0, 1},
{`"\x"`, "escape sequence incomplete", 3, 1},
{`"\y"`, "unknown escape sequence", 2, 1},
{`"\x0"`, "escape sequence incomplete", 4, 1},
{`"\00"`, "escape sequence incomplete", 4, 1},
{`"\377" /*`, "comment not terminated", 7, 1}, // valid octal escape
{`"\378"`, "illegal character U+0038 '8' in escape sequence", 4, 1},
{`"\400"`, "octal escape value > 255: 256", 5, 1},
{`s := "foo\z"`, "unknown escape sequence", 10, 1},
{`s := "foo\z00\nbar"`, "unknown escape sequence", 10, 1},
{`"\x`, "string not terminated", 0, 1},
{`"\x"`, "escape sequence incomplete", 3, 1},
{`var s string = "\x"`, "escape sequence incomplete", 18, 1},
{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 18, 1},
// former problem cases
{"package p\n\n\xef", "invalid UTF-8 encoding", 11, 3},
} {
var s scanner
nerrors := 0
s.init(&bytesReader{[]byte(test.src)}, func(pos, line int, msg string) {
nerrors++
// only check the first error
if nerrors == 1 {
if msg != test.msg {
t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
}
if pos != test.pos {
t.Errorf("%q: got pos = %d; want %d", test.src, pos, test.pos)
}
if line != test.line {
t.Errorf("%q: got line = %d; want %d", test.src, line, test.line)
}
} else if nerrors > 1 {
t.Errorf("%q: got unexpected %q at pos = %d, line = %d", test.src, msg, pos, line)
}
})
for {
s.next()
if s.tok == _EOF {
break
}
}
if nerrors == 0 {
t.Errorf("%q: got no error; want %q", test.src, test.msg)
}
}
}
This diff is collapsed.
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package syntax
import (
"fmt"
"io"
"os"
)
type Mode uint
type ErrorHandler func(pos, line int, msg string)
// TODO(gri) These need a lot more work.
func ReadFile(filename string, errh ErrorHandler, mode Mode) (*File, error) {
src, err := os.Open(filename)
if err != nil {
return nil, err
}
defer src.Close()
return Read(src, errh, mode)
}
type bytesReader struct {
data []byte
}
func (r *bytesReader) Read(p []byte) (int, error) {
if len(r.data) > 0 {
n := copy(p, r.data)
r.data = r.data[n:]
return n, nil
}
return 0, io.EOF
}
func ReadBytes(src []byte, errh ErrorHandler, mode Mode) (*File, error) {
return Read(&bytesReader{src}, errh, mode)
}
func Read(src io.Reader, errh ErrorHandler, mode Mode) (*File, error) {
var p parser
p.init(src, errh)
p.next()
ast := p.file()
if errh == nil && p.nerrors > 0 {
return nil, fmt.Errorf("%d syntax errors", p.nerrors)
}
return ast, nil
}
func Write(w io.Writer, n *File) error {
panic("unimplemented")
}
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment