Commit 24a088d2 authored by Rob Pike's avatar Rob Pike

text/template: efficient reporting of line numbers

Instead of scanning the text to count newlines, which is n², keep track as we go
and store the line number in the token.

benchmark                 old ns/op      new ns/op     delta
BenchmarkParseLarge-4     1589721293     38783310      -97.56%

Fixes #17851

Change-Id: I231225c61e667535e2ce55cd2facea6d279cc59d
Reviewed-on: https://go-review.googlesource.com/33234
Run-TryBot: Rob Pike <r@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
parent bb00a8d9
......@@ -1152,7 +1152,7 @@ func TestUnterminatedStringError(t *testing.T) {
t.Fatal("expected error")
}
str := err.Error()
if !strings.Contains(str, "X:3: unexpected unterminated raw quoted strin") {
if !strings.Contains(str, "X:3: unexpected unterminated raw quoted string") {
t.Fatalf("unexpected error: %s", str)
}
}
......
......@@ -16,6 +16,7 @@ type item struct {
typ itemType // The type of this item.
pos Pos // The starting position, in bytes, of this item in the input string.
val string // The value of this item.
line int // The line number at the start of this item.
}
func (i item) String() string {
......@@ -116,6 +117,7 @@ type lexer struct {
lastPos Pos // position of most recent item returned by nextItem
items chan item // channel of scanned items
parenDepth int // nesting depth of ( ) exprs
line int // 1+number of newlines seen
}
// next returns the next rune in the input.
......@@ -127,6 +129,9 @@ func (l *lexer) next() rune {
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
l.width = Pos(w)
l.pos += l.width
if r == '\n' {
l.line++
}
return r
}
......@@ -140,11 +145,20 @@ func (l *lexer) peek() rune {
// backup steps back one rune. Can only be called once per call of next.
func (l *lexer) backup() {
l.pos -= l.width
// Correct newline count.
if l.width == 1 && l.input[l.pos] == '\n' {
l.line--
}
}
// emit passes an item back to the client.
func (l *lexer) emit(t itemType) {
l.items <- item{t, l.start, l.input[l.start:l.pos]}
l.items <- item{t, l.start, l.input[l.start:l.pos], l.line}
// Some items contain text internally. If so, count their newlines.
switch t {
case itemText, itemRawString, itemLeftDelim, itemRightDelim:
l.line += strings.Count(l.input[l.start:l.pos], "\n")
}
l.start = l.pos
}
......@@ -169,17 +183,10 @@ func (l *lexer) acceptRun(valid string) {
l.backup()
}
// lineNumber reports which line we're on, based on the position of
// the previous item returned by nextItem. Doing it this way
// means we don't have to worry about peek double counting.
func (l *lexer) lineNumber() int {
return 1 + strings.Count(l.input[:l.lastPos], "\n")
}
// errorf returns an error token and terminates the scan by passing
// back a nil pointer that will be the next state, terminating l.nextItem.
func (l *lexer) errorf(format string, args ...interface{}) stateFn {
l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)}
l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.line}
return nil
}
......@@ -212,6 +219,7 @@ func lex(name, input, left, right string) *lexer {
leftDelim: left,
rightDelim: right,
items: make(chan item),
line: 1,
}
go l.run()
return l
......@@ -602,10 +610,14 @@ Loop:
// lexRawQuote scans a raw quoted string.
func lexRawQuote(l *lexer) stateFn {
startLine := l.line
Loop:
for {
switch l.next() {
case eof:
// Restore line number to location of opening quote.
// We will error out so it's ok just to overwrite the field.
l.line = startLine
return l.errorf("unterminated raw quoted string")
case '`':
break Loop
......
This diff is collapsed.
......@@ -157,7 +157,7 @@ func (t *Tree) ErrorContext(n Node) (location, context string) {
// errorf formats the error and terminates processing.
func (t *Tree) errorf(format string, args ...interface{}) {
t.Root = nil
format = fmt.Sprintf("template: %s:%d: %s", t.ParseName, t.lex.lineNumber(), format)
format = fmt.Sprintf("template: %s:%d: %s", t.ParseName, t.token[0].line, format)
panic(fmt.Errorf(format, args...))
}
......@@ -376,15 +376,17 @@ func (t *Tree) action() (n Node) {
return t.withControl()
}
t.backup()
token := t.peek()
// Do not pop variables; they persist until "end".
return t.newAction(t.peek().pos, t.lex.lineNumber(), t.pipeline("command"))
return t.newAction(token.pos, token.line, t.pipeline("command"))
}
// Pipeline:
// declarations? command ('|' command)*
func (t *Tree) pipeline(context string) (pipe *PipeNode) {
var decl []*VariableNode
pos := t.peekNonSpace().pos
token := t.peekNonSpace()
pos := token.pos
// Are there declarations?
for {
if v := t.peekNonSpace(); v.typ == itemVariable {
......@@ -413,7 +415,7 @@ func (t *Tree) pipeline(context string) (pipe *PipeNode) {
}
break
}
pipe = t.newPipeline(pos, t.lex.lineNumber(), decl)
pipe = t.newPipeline(pos, token.line, decl)
for {
switch token := t.nextNonSpace(); token.typ {
case itemRightDelim, itemRightParen:
......@@ -450,7 +452,6 @@ func (t *Tree) checkPipeline(pipe *PipeNode, context string) {
func (t *Tree) parseControl(allowElseIf bool, context string) (pos Pos, line int, pipe *PipeNode, list, elseList *ListNode) {
defer t.popVars(len(t.vars))
line = t.lex.lineNumber()
pipe = t.pipeline(context)
var next Node
list, next = t.itemList()
......@@ -479,7 +480,7 @@ func (t *Tree) parseControl(allowElseIf bool, context string) (pos Pos, line int
t.errorf("expected end; found %s", next)
}
}
return pipe.Position(), line, pipe, list, elseList
return pipe.Position(), pipe.Line, pipe, list, elseList
}
// If:
......@@ -521,9 +522,10 @@ func (t *Tree) elseControl() Node {
peek := t.peekNonSpace()
if peek.typ == itemIf {
// We see "{{else if ... " but in effect rewrite it to {{else}}{{if ... ".
return t.newElse(peek.pos, t.lex.lineNumber())
return t.newElse(peek.pos, peek.line)
}
return t.newElse(t.expect(itemRightDelim, "else").pos, t.lex.lineNumber())
token := t.expect(itemRightDelim, "else")
return t.newElse(token.pos, token.line)
}
// Block:
......@@ -550,7 +552,7 @@ func (t *Tree) blockControl() Node {
block.add()
block.stopParse()
return t.newTemplate(token.pos, t.lex.lineNumber(), name, pipe)
return t.newTemplate(token.pos, token.line, name, pipe)
}
// Template:
......@@ -567,7 +569,7 @@ func (t *Tree) templateControl() Node {
// Do not pop variables; they persist until "end".
pipe = t.pipeline(context)
}
return t.newTemplate(token.pos, t.lex.lineNumber(), name, pipe)
return t.newTemplate(token.pos, token.line, name, pipe)
}
func (t *Tree) parseTemplateName(token item, context string) (name string) {
......
......@@ -484,3 +484,37 @@ func TestBlock(t *testing.T) {
t.Errorf("inner template = %q, want %q", g, w)
}
}
func TestLineNum(t *testing.T) {
const count = 100
text := strings.Repeat("{{printf 1234}}\n", count)
tree, err := New("bench").Parse(text, "", "", make(map[string]*Tree), builtins)
if err != nil {
t.Fatal(err)
}
// Check the line numbers. Each line is an action containing a template, followed by text.
// That's two nodes per line.
nodes := tree.Root.Nodes
for i := 0; i < len(nodes); i += 2 {
line := 1 + i/2
// Action first.
action := nodes[i].(*ActionNode)
if action.Line != line {
t.Fatalf("line %d: action is line %d", line, action.Line)
}
pipe := action.Pipe
if pipe.Line != line {
t.Fatalf("line %d: pipe is line %d", line, pipe.Line)
}
}
}
func BenchmarkParseLarge(b *testing.B) {
text := strings.Repeat("{{1234}}\n", 10000)
for i := 0; i < b.N; i++ {
_, err := New("bench").Parse(text, "", "", make(map[string]*Tree), builtins)
if err != nil {
b.Fatal(err)
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment