Commit 39a1465d authored by Robert Griesemer's avatar Robert Griesemer

go/parser: fix memory leak by making a copy of token literals

The scanner returns slices into the original source
for token values. If those slices are making it into
the AST and from there into other long-living data
structures (e.g. godoc search), references to the
original source are kept around involuntarily.

For the current godoc and source tree, this change reduces
memory consumption after indexing and before GC by ~92MB
or almost 30%, and by ~10MB after GC (or about 6%).

R=rsc
CC=golang-dev
https://golang.org/cl/4273072
parent 52b95970
...@@ -47,9 +47,9 @@ type parser struct { ...@@ -47,9 +47,9 @@ type parser struct {
lineComment *ast.CommentGroup // last line comment lineComment *ast.CommentGroup // last line comment
// Next token // Next token
pos token.Pos // token position pos token.Pos // token position
tok token.Token // one token look-ahead tok token.Token // one token look-ahead
lit []byte // token literal lit_ []byte // token literal (slice into original source, don't hold on to it)
// Non-syntactic parser control // Non-syntactic parser control
exprLev int // < 0: in control clause, >= 0: in expression exprLev int // < 0: in control clause, >= 0: in expression
...@@ -95,6 +95,15 @@ func (p *parser) init(fset *token.FileSet, filename string, src []byte, mode uin ...@@ -95,6 +95,15 @@ func (p *parser) init(fset *token.FileSet, filename string, src []byte, mode uin
} }
func (p *parser) lit() []byte {
// make a copy of p.lit_ so that we don't hold on to
// a copy of the entire source indirectly in the AST
t := make([]byte, len(p.lit_))
copy(t, p.lit_)
return t
}
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// Scoping support // Scoping support
...@@ -235,7 +244,7 @@ func (p *parser) next0() { ...@@ -235,7 +244,7 @@ func (p *parser) next0() {
s := p.tok.String() s := p.tok.String()
switch { switch {
case p.tok.IsLiteral(): case p.tok.IsLiteral():
p.printTrace(s, string(p.lit)) p.printTrace(s, string(p.lit_))
case p.tok.IsOperator(), p.tok.IsKeyword(): case p.tok.IsOperator(), p.tok.IsKeyword():
p.printTrace("\"" + s + "\"") p.printTrace("\"" + s + "\"")
default: default:
...@@ -243,7 +252,7 @@ func (p *parser) next0() { ...@@ -243,7 +252,7 @@ func (p *parser) next0() {
} }
} }
p.pos, p.tok, p.lit = p.scanner.Scan() p.pos, p.tok, p.lit_ = p.scanner.Scan()
} }
// Consume a comment and return it and the line on which it ends. // Consume a comment and return it and the line on which it ends.
...@@ -251,15 +260,15 @@ func (p *parser) consumeComment() (comment *ast.Comment, endline int) { ...@@ -251,15 +260,15 @@ func (p *parser) consumeComment() (comment *ast.Comment, endline int) {
// /*-style comments may end on a different line than where they start. // /*-style comments may end on a different line than where they start.
// Scan the comment for '\n' chars and adjust endline accordingly. // Scan the comment for '\n' chars and adjust endline accordingly.
endline = p.file.Line(p.pos) endline = p.file.Line(p.pos)
if p.lit[1] == '*' { if p.lit_[1] == '*' {
for _, b := range p.lit { for _, b := range p.lit_ {
if b == '\n' { if b == '\n' {
endline++ endline++
} }
} }
} }
comment = &ast.Comment{p.pos, p.lit} comment = &ast.Comment{p.pos, p.lit()}
p.next0() p.next0()
return return
...@@ -349,12 +358,12 @@ func (p *parser) errorExpected(pos token.Pos, msg string) { ...@@ -349,12 +358,12 @@ func (p *parser) errorExpected(pos token.Pos, msg string) {
if pos == p.pos { if pos == p.pos {
// the error happened at the current position; // the error happened at the current position;
// make the error message more specific // make the error message more specific
if p.tok == token.SEMICOLON && p.lit[0] == '\n' { if p.tok == token.SEMICOLON && p.lit_[0] == '\n' {
msg += ", found newline" msg += ", found newline"
} else { } else {
msg += ", found '" + p.tok.String() + "'" msg += ", found '" + p.tok.String() + "'"
if p.tok.IsLiteral() { if p.tok.IsLiteral() {
msg += " " + string(p.lit) msg += " " + string(p.lit_)
} }
} }
} }
...@@ -386,7 +395,7 @@ func (p *parser) parseIdent() *ast.Ident { ...@@ -386,7 +395,7 @@ func (p *parser) parseIdent() *ast.Ident {
pos := p.pos pos := p.pos
name := "_" name := "_"
if p.tok == token.IDENT { if p.tok == token.IDENT {
name = string(p.lit) name = string(p.lit_)
p.next() p.next()
} else { } else {
p.expect(token.IDENT) // use expect() error handling p.expect(token.IDENT) // use expect() error handling
...@@ -525,7 +534,7 @@ func (p *parser) parseFieldDecl() *ast.Field { ...@@ -525,7 +534,7 @@ func (p *parser) parseFieldDecl() *ast.Field {
// optional tag // optional tag
var tag *ast.BasicLit var tag *ast.BasicLit
if p.tok == token.STRING { if p.tok == token.STRING {
tag = &ast.BasicLit{p.pos, p.tok, p.lit} tag = &ast.BasicLit{p.pos, p.tok, p.lit()}
p.next() p.next()
} }
...@@ -943,7 +952,7 @@ func (p *parser) parseOperand() ast.Expr { ...@@ -943,7 +952,7 @@ func (p *parser) parseOperand() ast.Expr {
return ident return ident
case token.INT, token.FLOAT, token.IMAG, token.CHAR, token.STRING: case token.INT, token.FLOAT, token.IMAG, token.CHAR, token.STRING:
x := &ast.BasicLit{p.pos, p.tok, p.lit} x := &ast.BasicLit{p.pos, p.tok, p.lit()}
p.next() p.next()
return x return x
...@@ -1857,7 +1866,7 @@ func parseImportSpec(p *parser, doc *ast.CommentGroup, _ int) ast.Spec { ...@@ -1857,7 +1866,7 @@ func parseImportSpec(p *parser, doc *ast.CommentGroup, _ int) ast.Spec {
var path *ast.BasicLit var path *ast.BasicLit
if p.tok == token.STRING { if p.tok == token.STRING {
path = &ast.BasicLit{p.pos, p.tok, p.lit} path = &ast.BasicLit{p.pos, p.tok, p.lit()}
p.next() p.next()
} else { } else {
p.expect(token.STRING) // use expect() error handling p.expect(token.STRING) // use expect() error handling
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment