Commit bff2c207 authored by Robert Griesemer's avatar Robert Griesemer

go/scanner: Make Init take a *token.File instead of a *token.FileSet.

Until now, each scan of a file added a new file to the file set.
With this change, a file can be re-scanned using the same *token.File
w/o changing the file set. Eventually this will enable the re-use of
cached source code in godoc (for the fulltext index). At the moment,
source files are read over and over again from disk.

This is the first step in that direction.

R=r, rsc, r2
CC=golang-dev
https://golang.org/cl/4001041
parent b814cca3
...@@ -239,7 +239,9 @@ func lineSelection(text []byte) Selection { ...@@ -239,7 +239,9 @@ func lineSelection(text []byte) Selection {
// //
func commentSelection(src []byte) Selection { func commentSelection(src []byte) Selection {
var s scanner.Scanner var s scanner.Scanner
file := s.Init(token.NewFileSet(), "", src, nil, scanner.ScanComments+scanner.InsertSemis) fset := token.NewFileSet()
file := fset.AddFile("", fset.Base(), len(src))
s.Init(file, src, nil, scanner.ScanComments+scanner.InsertSemis)
return func() (seg []int) { return func() (seg []int) {
for { for {
pos, tok, lit := s.Scan() pos, tok, lit := s.Scan()
......
...@@ -817,7 +817,8 @@ func (x *Index) LookupWord(w string) (match *LookupResult, alt *AltWords) { ...@@ -817,7 +817,8 @@ func (x *Index) LookupWord(w string) (match *LookupResult, alt *AltWords) {
func isIdentifier(s string) bool { func isIdentifier(s string) bool {
var S scanner.Scanner var S scanner.Scanner
S.Init(token.NewFileSet(), "", []byte(s), nil, 0) fset := token.NewFileSet()
S.Init(fset.AddFile("", fset.Base(), len(s)), []byte(s), nil, 0)
if _, tok, _ := S.Scan(); tok == token.IDENT { if _, tok, _ := S.Scan(); tok == token.IDENT {
_, tok, _ := S.Scan() _, tok, _ := S.Scan()
return tok == token.EOF return tok == token.EOF
......
...@@ -156,7 +156,8 @@ func (p *ebnfParser) parse(fset *token.FileSet, out io.Writer, src []byte) { ...@@ -156,7 +156,8 @@ func (p *ebnfParser) parse(fset *token.FileSet, out io.Writer, src []byte) {
// initialize ebnfParser // initialize ebnfParser
p.out = out p.out = out
p.src = src p.src = src
p.file = p.scanner.Init(fset, "", src, p, 0) p.file = fset.AddFile("", fset.Base(), len(src))
p.scanner.Init(p.file, src, p, 0)
p.next() // initializes pos, tok, lit p.next() // initializes pos, tok, lit
// process source // process source
......
...@@ -177,7 +177,7 @@ func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar ...@@ -177,7 +177,7 @@ func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar
// initialize parser // initialize parser
p.fset = fset p.fset = fset
p.ErrorVector.Reset() p.ErrorVector.Reset()
p.scanner.Init(fset, filename, src, p, 0) p.scanner.Init(fset.AddFile(filename, fset.Base(), len(src)), src, p, 0)
p.next() // initializes pos, tok, lit p.next() // initializes pos, tok, lit
grammar := make(Grammar) grammar := make(Grammar)
......
...@@ -42,8 +42,9 @@ func (p *parser) next() { ...@@ -42,8 +42,9 @@ func (p *parser) next() {
func (p *parser) init(fset *token.FileSet, filename string, src []byte) { func (p *parser) init(fset *token.FileSet, filename string, src []byte) {
p.ErrorVector.Reset() p.ErrorVector.Reset()
p.file = p.scanner.Init(fset, filename, src, p, scanner.AllowIllegalChars) // return '@' as token.ILLEGAL w/o error message p.file = fset.AddFile(filename, fset.Base(), len(src))
p.next() // initializes pos, tok, lit p.scanner.Init(p.file, src, p, scanner.AllowIllegalChars) // return '@' as token.ILLEGAL w/o error message
p.next() // initializes pos, tok, lit
p.packs = make(map[string]string) p.packs = make(map[string]string)
p.rules = make(map[string]expr) p.rules = make(map[string]expr)
} }
......
...@@ -69,8 +69,9 @@ func scannerMode(mode uint) uint { ...@@ -69,8 +69,9 @@ func scannerMode(mode uint) uint {
} }
func (p *parser) init(fset *token.FileSet, filename string, src []byte, mode uint) { func (p *parser) init(file *token.File, src []byte, mode uint) {
p.file = p.scanner.Init(fset, filename, src, p, scannerMode(mode)) p.file = file
p.scanner.Init(p.file, src, p, scannerMode(mode))
p.mode = mode p.mode = mode
p.trace = mode&Trace != 0 // for convenience (p.trace is used frequently) p.trace = mode&Trace != 0 // for convenience (p.trace is used frequently)
p.next() p.next()
......
...@@ -96,24 +96,28 @@ const ( ...@@ -96,24 +96,28 @@ const (
InsertSemis // automatically insert semicolons InsertSemis // automatically insert semicolons
) )
// TODO(gri) Would it be better to simply provide *token.File to Init // Init prepares the scanner S to tokenize the text src by setting the
// instead of fset, and filename, and then return the file? // scanner at the beginning of src. The scanner uses the file set file
// It could cause an error/panic if the provided file.Size() // for position information and it adds line information for each line.
// doesn't match len(src). // It is ok to re-use the same file when re-scanning the same file as
// line information which is already present is ignored. Init causes a
// Init prepares the scanner S to tokenize the text src. It sets the // panic if the file size does not match the src size.
// scanner at the beginning of the source text, adds a new file with
// the given filename to the file set fset, and returns that file.
// //
// Calls to Scan will use the error handler err if they encounter a // Calls to Scan will use the error handler err if they encounter a
// syntax error and err is not nil. Also, for each error encountered, // syntax error and err is not nil. Also, for each error encountered,
// the Scanner field ErrorCount is incremented by one. The mode parameter // the Scanner field ErrorCount is incremented by one. The mode parameter
// determines how comments, illegal characters, and semicolons are handled. // determines how comments, illegal characters, and semicolons are handled.
// //
func (S *Scanner) Init(fset *token.FileSet, filename string, src []byte, err ErrorHandler, mode uint) *token.File { // Note that Init may call err if there is an error in the first character
// of the file.
//
func (S *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode uint) {
// Explicitly initialize all fields since a scanner may be reused. // Explicitly initialize all fields since a scanner may be reused.
S.file = fset.AddFile(filename, fset.Base(), len(src)) if file.Size() != len(src) {
S.dir, _ = path.Split(filename) panic("file size does not match src len")
}
S.file = file
S.dir, _ = path.Split(file.Name())
S.src = src S.src = src
S.err = err S.err = err
S.mode = mode S.mode = mode
...@@ -126,8 +130,6 @@ func (S *Scanner) Init(fset *token.FileSet, filename string, src []byte, err Err ...@@ -126,8 +130,6 @@ func (S *Scanner) Init(fset *token.FileSet, filename string, src []byte, err Err
S.ErrorCount = 0 S.ErrorCount = 0
S.next() S.next()
return S.file
} }
......
...@@ -228,7 +228,7 @@ func TestScan(t *testing.T) { ...@@ -228,7 +228,7 @@ func TestScan(t *testing.T) {
// verify scan // verify scan
var s Scanner var s Scanner
s.Init(fset, "", []byte(src), &testErrorHandler{t}, ScanComments) s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), &testErrorHandler{t}, ScanComments)
index := 0 index := 0
epos := token.Position{"", 0, 1, 1} // expected position epos := token.Position{"", 0, 1, 1} // expected position
for { for {
...@@ -273,7 +273,8 @@ func TestScan(t *testing.T) { ...@@ -273,7 +273,8 @@ func TestScan(t *testing.T) {
func checkSemi(t *testing.T, line string, mode uint) { func checkSemi(t *testing.T, line string, mode uint) {
var S Scanner var S Scanner
file := S.Init(fset, "TestSemis", []byte(line), nil, mode) file := fset.AddFile("TestSemis", fset.Base(), len(line))
S.Init(file, []byte(line), nil, mode)
pos, tok, lit := S.Scan() pos, tok, lit := S.Scan()
for tok != token.EOF { for tok != token.EOF {
if tok == token.ILLEGAL { if tok == token.ILLEGAL {
...@@ -476,7 +477,8 @@ func TestLineComments(t *testing.T) { ...@@ -476,7 +477,8 @@ func TestLineComments(t *testing.T) {
// verify scan // verify scan
var S Scanner var S Scanner
file := S.Init(fset, "dir/TestLineComments", []byte(src), nil, 0) file := fset.AddFile("dir/TestLineComments", fset.Base(), len(src))
S.Init(file, []byte(src), nil, 0)
for _, s := range segments { for _, s := range segments {
p, _, lit := S.Scan() p, _, lit := S.Scan()
pos := file.Position(p) pos := file.Position(p)
...@@ -495,7 +497,8 @@ func TestInit(t *testing.T) { ...@@ -495,7 +497,8 @@ func TestInit(t *testing.T) {
// 1st init // 1st init
src1 := "if true { }" src1 := "if true { }"
f1 := s.Init(fset, "", []byte(src1), nil, 0) f1 := fset.AddFile("src1", fset.Base(), len(src1))
s.Init(f1, []byte(src1), nil, 0)
if f1.Size() != len(src1) { if f1.Size() != len(src1) {
t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1)) t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1))
} }
...@@ -508,7 +511,8 @@ func TestInit(t *testing.T) { ...@@ -508,7 +511,8 @@ func TestInit(t *testing.T) {
// 2nd init // 2nd init
src2 := "go true { ]" src2 := "go true { ]"
f2 := s.Init(fset, "", []byte(src2), nil, 0) f2 := fset.AddFile("src2", fset.Base(), len(src2))
s.Init(f2, []byte(src2), nil, 0)
if f2.Size() != len(src2) { if f2.Size() != len(src2) {
t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2)) t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2))
} }
...@@ -527,7 +531,8 @@ func TestIllegalChars(t *testing.T) { ...@@ -527,7 +531,8 @@ func TestIllegalChars(t *testing.T) {
var s Scanner var s Scanner
const src = "*?*$*@*" const src = "*?*$*@*"
file := s.Init(fset, "", []byte(src), &testErrorHandler{t}, AllowIllegalChars) file := fset.AddFile("", fset.Base(), len(src))
s.Init(file, []byte(src), &testErrorHandler{t}, AllowIllegalChars)
for offs, ch := range src { for offs, ch := range src {
pos, tok, lit := s.Scan() pos, tok, lit := s.Scan()
if poffs := file.Offset(pos); poffs != offs { if poffs := file.Offset(pos); poffs != offs {
...@@ -556,7 +561,7 @@ func TestStdErrorHander(t *testing.T) { ...@@ -556,7 +561,7 @@ func TestStdErrorHander(t *testing.T) {
v := new(ErrorVector) v := new(ErrorVector)
var s Scanner var s Scanner
s.Init(fset, "File1", []byte(src), v, 0) s.Init(fset.AddFile("File1", fset.Base(), len(src)), []byte(src), v, 0)
for { for {
if _, tok, _ := s.Scan(); tok == token.EOF { if _, tok, _ := s.Scan(); tok == token.EOF {
break break
...@@ -604,7 +609,7 @@ func (h *errorCollector) Error(pos token.Position, msg string) { ...@@ -604,7 +609,7 @@ func (h *errorCollector) Error(pos token.Position, msg string) {
func checkError(t *testing.T, src string, tok token.Token, pos int, err string) { func checkError(t *testing.T, src string, tok token.Token, pos int, err string) {
var s Scanner var s Scanner
var h errorCollector var h errorCollector
s.Init(fset, "", []byte(src), &h, ScanComments) s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), &h, ScanComments)
_, tok0, _ := s.Scan() _, tok0, _ := s.Scan()
_, tok1, _ := s.Scan() _, tok1, _ := s.Scan()
if tok0 != tok { if tok0 != tok {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment