Commit fc5107c2 authored by Brad Fitzpatrick's avatar Brad Fitzpatrick

go/doc: compile regexps lazily

Compile go/doc's 4 regexps lazily, on demand.

Also, add a test for the one that had no test coverage.

This reduces init-time CPU as well as heap by ~20KB when they're not
used, which seems to be common enough. As an example, cmd/doc only
seems to use 1 of them. (as noted by temporary print statements)

Updates #26775

Change-Id: I85df89b836327a53fb8e1ace3f92480374270368
Reviewed-on: https://go-review.googlesource.com/127875
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent c544e0fb
...@@ -204,7 +204,7 @@ var pkgDeps = map[string][]string{ ...@@ -204,7 +204,7 @@ var pkgDeps = map[string][]string{
// Go parser. // Go parser.
"go/ast": {"L4", "OS", "go/scanner", "go/token"}, "go/ast": {"L4", "OS", "go/scanner", "go/token"},
"go/doc": {"L4", "go/ast", "go/token", "regexp", "text/template"}, "go/doc": {"L4", "OS", "go/ast", "go/token", "regexp", "text/template"},
"go/parser": {"L4", "OS", "go/ast", "go/scanner", "go/token"}, "go/parser": {"L4", "OS", "go/ast", "go/scanner", "go/token"},
"go/printer": {"L4", "OS", "go/ast", "go/scanner", "go/token", "text/tabwriter"}, "go/printer": {"L4", "OS", "go/ast", "go/scanner", "go/token", "text/tabwriter"},
"go/scanner": {"L4", "OS", "go/token"}, "go/scanner": {"L4", "OS", "go/token"},
......
...@@ -8,7 +8,6 @@ package doc ...@@ -8,7 +8,6 @@ package doc
import ( import (
"io" "io"
"regexp"
"strings" "strings"
"text/template" // for HTMLEscape "text/template" // for HTMLEscape
"unicode" "unicode"
...@@ -63,7 +62,7 @@ const ( ...@@ -63,7 +62,7 @@ const (
urlRx = protoPart + `://` + hostPart + pathPart urlRx = protoPart + `://` + hostPart + pathPart
) )
var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`) var matchRx = newLazyRE(`(` + urlRx + `)|(` + identRx + `)`)
var ( var (
html_a = []byte(`<a href="`) html_a = []byte(`<a href="`)
...@@ -276,7 +275,7 @@ type block struct { ...@@ -276,7 +275,7 @@ type block struct {
lines []string lines []string
} }
var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`) var nonAlphaNumRx = newLazyRE(`[^a-zA-Z0-9]`)
func anchorID(line string) string { func anchorID(line string) string {
// Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols. // Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
......
...@@ -144,3 +144,12 @@ func Test(t *testing.T) { ...@@ -144,3 +144,12 @@ func Test(t *testing.T) {
test(t, AllDecls) test(t, AllDecls)
test(t, AllMethods) test(t, AllMethods)
} }
func TestAnchorID(t *testing.T) {
const in = "Important Things 2 Know & Stuff"
const want = "hdr-Important_Things_2_Know___Stuff"
got := anchorID(in)
if got != want {
t.Errorf("anchorID(%q) = %q; want %q", in, got, want)
}
}
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package doc
import (
"os"
"regexp"
"strings"
"sync"
)
type lazyRE struct {
str string
once sync.Once
rx *regexp.Regexp
}
func (r *lazyRE) re() *regexp.Regexp {
r.once.Do(r.build)
return r.rx
}
func (r *lazyRE) build() {
r.rx = regexp.MustCompile(r.str)
r.str = ""
}
func (r *lazyRE) FindStringSubmatchIndex(s string) []int {
return r.re().FindStringSubmatchIndex(s)
}
func (r *lazyRE) ReplaceAllString(src, repl string) string {
return r.re().ReplaceAllString(src, repl)
}
func (r *lazyRE) MatchString(s string) bool {
return r.re().MatchString(s)
}
var inTest = len(os.Args) > 0 && strings.HasSuffix(strings.TrimSuffix(os.Args[0], ".exe"), ".test")
func newLazyRE(str string) *lazyRE {
lr := &lazyRE{str: str}
if inTest {
// In tests, always compile the regexps early.
lr.re()
}
return lr
}
...@@ -7,7 +7,6 @@ package doc ...@@ -7,7 +7,6 @@ package doc
import ( import (
"go/ast" "go/ast"
"go/token" "go/token"
"regexp"
"sort" "sort"
"strconv" "strconv"
) )
...@@ -426,8 +425,8 @@ func (r *reader) readFunc(fun *ast.FuncDecl) { ...@@ -426,8 +425,8 @@ func (r *reader) readFunc(fun *ast.FuncDecl) {
var ( var (
noteMarker = `([A-Z][A-Z]+)\(([^)]+)\):?` // MARKER(uid), MARKER at least 2 chars, uid at least 1 char noteMarker = `([A-Z][A-Z]+)\(([^)]+)\):?` // MARKER(uid), MARKER at least 2 chars, uid at least 1 char
noteMarkerRx = regexp.MustCompile(`^[ \t]*` + noteMarker) // MARKER(uid) at text start noteMarkerRx = newLazyRE(`^[ \t]*` + noteMarker) // MARKER(uid) at text start
noteCommentRx = regexp.MustCompile(`^/[/*][ \t]*` + noteMarker) // MARKER(uid) at comment start noteCommentRx = newLazyRE(`^/[/*][ \t]*` + noteMarker) // MARKER(uid) at comment start
) )
// readNote collects a single note from a sequence of comments. // readNote collects a single note from a sequence of comments.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment