Commit 034fa90d authored by Nigel Tao's avatar Nigel Tao

exp/html: add some tokenizer and parser benchmarks.

$GOROOT/src/pkg/exp/html/testdata/go1.html is an execution of the
$GOROOT/doc/go1.html template by godoc.

Sample numbers on my linux,amd64 desktop:
BenchmarkParser	     500	   4699198 ns/op	  16.63 MB/s
--- BENCH: BenchmarkParser
        parse_test.go:409: 1 iterations, 14653 mallocs per iteration
        parse_test.go:409: 100 iterations, 14651 mallocs per iteration
        parse_test.go:409: 500 iterations, 14651 mallocs per iteration
BenchmarkRawLevelTokenizer	    2000	    904957 ns/op	  86.37 MB/s
--- BENCH: BenchmarkRawLevelTokenizer
        token_test.go:657: 1 iterations, 28 mallocs per iteration
        token_test.go:657: 100 iterations, 28 mallocs per iteration
        token_test.go:657: 2000 iterations, 28 mallocs per iteration
BenchmarkLowLevelTokenizer	    2000	   1134300 ns/op	  68.91 MB/s
--- BENCH: BenchmarkLowLevelTokenizer
        token_test.go:657: 1 iterations, 41 mallocs per iteration
        token_test.go:657: 100 iterations, 41 mallocs per iteration
        token_test.go:657: 2000 iterations, 41 mallocs per iteration
BenchmarkHighLevelTokenizer	    1000	   2096179 ns/op	  37.29 MB/s
--- BENCH: BenchmarkHighLevelTokenizer
        token_test.go:657: 1 iterations, 6616 mallocs per iteration
        token_test.go:657: 100 iterations, 6616 mallocs per iteration
        token_test.go:657: 1000 iterations, 6616 mallocs per iteration

R=rsc
CC=andybalholm, golang-dev, r
https://golang.org/cl/6257067
parent 397b6873
......@@ -11,8 +11,10 @@ import (
"flag"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"runtime"
"sort"
"strings"
"testing"
......@@ -386,3 +388,23 @@ var renderTestBlacklist = map[string]bool{
// A <plaintext> element can't have anything after it in HTML.
`<table><plaintext><td>`: true,
}
func BenchmarkParser(b *testing.B) {
buf, err := ioutil.ReadFile("testdata/go1.html")
if err != nil {
b.Fatalf("could not read testdata/go1.html: %v", err)
}
b.SetBytes(int64(len(buf)))
runtime.GC()
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
mallocs := ms.Mallocs
b.ResetTimer()
for i := 0; i < b.N; i++ {
Parse(bytes.NewBuffer(buf))
}
b.StopTimer()
runtime.ReadMemStats(&ms)
mallocs = ms.Mallocs - mallocs
b.Logf("%d iterations, %d mallocs per iteration\n", b.N, int(mallocs)/b.N)
}
This diff is collapsed.
......@@ -7,6 +7,8 @@ package html
import (
"bytes"
"io"
"io/ioutil"
"runtime"
"strings"
"testing"
)
......@@ -589,3 +591,66 @@ loop:
t.Errorf("TestBufAPI: want %q got %q", u, v)
}
}
const (
rawLevel = iota
lowLevel
highLevel
)
func benchmarkTokenizer(b *testing.B, level int) {
buf, err := ioutil.ReadFile("testdata/go1.html")
if err != nil {
b.Fatalf("could not read testdata/go1.html: %v", err)
}
b.SetBytes(int64(len(buf)))
runtime.GC()
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
mallocs := ms.Mallocs
b.ResetTimer()
for i := 0; i < b.N; i++ {
z := NewTokenizer(bytes.NewBuffer(buf))
for {
tt := z.Next()
if tt == ErrorToken {
if err := z.Err(); err != nil && err != io.EOF {
b.Fatalf("tokenizer error: %v", err)
}
break
}
switch level {
case rawLevel:
// Calling z.Raw just returns the raw bytes of the token. It does
// not unescape &lt; to <, or lower-case tag names and attribute keys.
z.Raw()
case lowLevel:
// Caling z.Text, z.TagName and z.TagAttr returns []byte values
// whose contents may change on the next call to z.Next.
switch tt {
case TextToken, CommentToken, DoctypeToken:
z.Text()
case StartTagToken, SelfClosingTagToken:
_, more := z.TagName()
for more {
_, _, more = z.TagAttr()
}
case EndTagToken:
z.TagName()
}
case highLevel:
// Calling z.Token converts []byte values to strings whose validity
// extend beyond the next call to z.Next.
z.Token()
}
}
}
b.StopTimer()
runtime.ReadMemStats(&ms)
mallocs = ms.Mallocs - mallocs
b.Logf("%d iterations, %d mallocs per iteration\n", b.N, int(mallocs)/b.N)
}
func BenchmarkRawLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, rawLevel) }
func BenchmarkLowLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, lowLevel) }
func BenchmarkHighLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, highLevel) }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment