exp/html: add some tokenizer and parser benchmarks.

$GOROOT/src/pkg/exp/html/testdata/go1.html is an execution of the $GOROOT/doc/go1.html template by godoc. Sample numbers on my linux,amd64 desktop: BenchmarkParser 500 4699198 ns/op 16.63 MB/s --- BENCH: BenchmarkParser parse_test.go:409: 1 iterations, 14653 mallocs per iteration parse_test.go:409: 100 iterations, 14651 mallocs per iteration parse_test.go:409: 500 iterations, 14651 mallocs per iteration BenchmarkRawLevelTokenizer 2000 904957 ns/op 86.37 MB/s --- BENCH: BenchmarkRawLevelTokenizer token_test.go:657: 1 iterations, 28 mallocs per iteration token_test.go:657: 100 iterations, 28 mallocs per iteration token_test.go:657: 2000 iterations, 28 mallocs per iteration BenchmarkLowLevelTokenizer 2000 1134300 ns/op 68.91 MB/s --- BENCH: BenchmarkLowLevelTokenizer token_test.go:657: 1 iterations, 41 mallocs per iteration token_test.go:657: 100 iterations, 41 mallocs per iteration token_test.go:657: 2000 iterations, 41 mallocs per iteration BenchmarkHighLevelTokenizer 1000 2096179 ns/op 37.29 MB/s --- BENCH: BenchmarkHighLevelTokenizer token_test.go:657: 1 iterations, 6616 mallocs per iteration token_test.go:657: 100 iterations, 6616 mallocs per iteration token_test.go:657: 1000 iterations, 6616 mallocs per iteration R=rsc CC=andybalholm, golang-dev, r https://golang.org/cl/6257067

exp/html: add some tokenizer and parser benchmarks.
$GOROOT/src/pkg/exp/html/testdata/go1.html is an execution of the $GOROOT/doc/go1.html template by godoc. Sample numbers on my linux,amd64 desktop: BenchmarkParser 500 4699198 ns/op 16.63 MB/s --- BENCH: BenchmarkParser parse_test.go:409: 1 iterations, 14653 mallocs per iteration parse_test.go:409: 100 iterations, 14651 mallocs per iteration parse_test.go:409: 500 iterations, 14651 mallocs per iteration BenchmarkRawLevelTokenizer 2000 904957 ns/op 86.37 MB/s --- BENCH: BenchmarkRawLevelTokenizer token_test.go:657: 1 iterations, 28 mallocs per iteration token_test.go:657: 100 iterations, 28 mallocs per iteration token_test.go:657: 2000 iterations, 28 mallocs per iteration BenchmarkLowLevelTokenizer 2000 1134300 ns/op 68.91 MB/s --- BENCH: BenchmarkLowLevelTokenizer token_test.go:657: 1 iterations, 41 mallocs per iteration token_test.go:657: 100 iterations, 41 mallocs per iteration token_test.go:657: 2000 iterations, 41 mallocs per iteration BenchmarkHighLevelTokenizer 1000 2096179 ns/op 37.29 MB/s --- BENCH: BenchmarkHighLevelTokenizer token_test.go:657: 1 iterations, 6616 mallocs per iteration token_test.go:657: 100 iterations, 6616 mallocs per iteration token_test.go:657: 1000 iterations, 6616 mallocs per iteration R=rsc CC=andybalholm, golang-dev, r https://golang.org/cl/6257067
034fa90d · Nigel Tao · 397b6873 · 034fa90d · 034fa90d · 034fa90d
Commit 034fa90d authored 12 years ago by Nigel Tao
3 changed files
--- a/src/pkg/exp/html/parse_test.go
+++ b/src/pkg/exp/html/parse_test.go
@@ -11,8 +11,10 @@ import (
 	"flag"
 	"fmt"
 	"io"
+	"io/ioutil"
 	"os"
 	"path/filepath"
+	"runtime"
 	"sort"
 	"strings"
 	"testing"
@@ -386,3 +388,23 @@ var renderTestBlacklist = map[string]bool{
 	// A <plaintext> element can't have anything after it in HTML.
 	`<table><plaintext><td>`: true,
 }
+
+func BenchmarkParser(b *testing.B) {
+	buf, err := ioutil.ReadFile("testdata/go1.html")
+	if err != nil {
+		b.Fatalf("could not read testdata/go1.html: %v", err)
+	}
+	b.SetBytes(int64(len(buf)))
+	runtime.GC()
+	var ms runtime.MemStats
+	runtime.ReadMemStats(&ms)
+	mallocs := ms.Mallocs
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Parse(bytes.NewBuffer(buf))
+	}
+	b.StopTimer()
+	runtime.ReadMemStats(&ms)
+	mallocs = ms.Mallocs - mallocs
+	b.Logf("%d iterations, %d mallocs per iteration\n", b.N, int(mallocs)/b.N)
+}
--- a/src/pkg/exp/html/testdata/go1.html
+++ b/src/pkg/exp/html/testdata/go1.html
--- a/src/pkg/exp/html/token_test.go
+++ b/src/pkg/exp/html/token_test.go
@@ -7,6 +7,8 @@ package html
 import (
 	"bytes"
 	"io"
+	"io/ioutil"
+	"runtime"
 	"strings"
 	"testing"
 )
@@ -589,3 +591,66 @@ loop:
 		t.Errorf("TestBufAPI: want %q got %q", u, v)
 	}
 }
+
+const (
+	rawLevel = iota
+	lowLevel
+	highLevel
+)
+
+func benchmarkTokenizer(b *testing.B, level int) {
+	buf, err := ioutil.ReadFile("testdata/go1.html")
+	if err != nil {
+		b.Fatalf("could not read testdata/go1.html: %v", err)
+	}
+	b.SetBytes(int64(len(buf)))
+	runtime.GC()
+	var ms runtime.MemStats
+	runtime.ReadMemStats(&ms)
+	mallocs := ms.Mallocs
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		z := NewTokenizer(bytes.NewBuffer(buf))
+		for {
+			tt := z.Next()
+			if tt == ErrorToken {
+				if err := z.Err(); err != nil && err != io.EOF {
+					b.Fatalf("tokenizer error: %v", err)
+				}
+				break
+			}
+			switch level {
+			case rawLevel:
+				// Calling z.Raw just returns the raw bytes of the token. It does
+				// not unescape &lt; to <, or lower-case tag names and attribute keys.
+				z.Raw()
+			case lowLevel:
+				// Caling z.Text, z.TagName and z.TagAttr returns []byte values
+				// whose contents may change on the next call to z.Next.
+				switch tt {
+				case TextToken, CommentToken, DoctypeToken:
+					z.Text()
+				case StartTagToken, SelfClosingTagToken:
+					_, more := z.TagName()
+					for more {
+						_, _, more = z.TagAttr()
+					}
+				case EndTagToken:
+					z.TagName()
+				}
+			case highLevel:
+				// Calling z.Token converts []byte values to strings whose validity
+				// extend beyond the next call to z.Next.
+				z.Token()
+			}
+		}
+	}
+	b.StopTimer()
+	runtime.ReadMemStats(&ms)
+	mallocs = ms.Mallocs - mallocs
+	b.Logf("%d iterations, %d mallocs per iteration\n", b.N, int(mallocs)/b.N)
+}
+
+func BenchmarkRawLevelTokenizer(b *testing.B)  { benchmarkTokenizer(b, rawLevel) }
+func BenchmarkLowLevelTokenizer(b *testing.B)  { benchmarkTokenizer(b, lowLevel) }
+func BenchmarkHighLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, highLevel) }