Commit f40cb19a authored by Daniel Martí's avatar Daniel Martí

internal/lazyregexp: add a lazy Regexp package

This was implemented as part of go/doc, but it's going to be useful in
other packages. In particular, many packages under cmd/go like web and
vcs make somewhat heavy use of global regexes, which add a non-trivial
amount of init work to the cmd/go program.

A lazy wrapper around regexp.Regexp will make it trivial to get rid of
the extra cost with a trivial refactor, so make it possible for other
packages in the repository to make use of it. While naming the package,
give the members better names, such as lazyregexp.New and
lazyregexp.Regexp.

We're also considering adding some form of a lazy API to the public
regexp package, so this internal package will allow us to get some
initial experience across std and cmd.

For #29382.

Change-Id: I30b0e72871d5267c309786f95f4cb15c68b2393d
Reviewed-on: https://go-review.googlesource.com/c/164040
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: default avatarBryan C. Mills <bcmills@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 0d8c3637
...@@ -192,10 +192,11 @@ var pkgDeps = map[string][]string{ ...@@ -192,10 +192,11 @@ var pkgDeps = map[string][]string{
"runtime/trace": {"L0", "context", "fmt"}, "runtime/trace": {"L0", "context", "fmt"},
"text/tabwriter": {"L2"}, "text/tabwriter": {"L2"},
"testing": {"L2", "flag", "fmt", "internal/race", "os", "runtime/debug", "runtime/pprof", "runtime/trace", "time"}, "testing": {"L2", "flag", "fmt", "internal/race", "os", "runtime/debug", "runtime/pprof", "runtime/trace", "time"},
"testing/iotest": {"L2", "log"}, "testing/iotest": {"L2", "log"},
"testing/quick": {"L2", "flag", "fmt", "reflect", "time"}, "testing/quick": {"L2", "flag", "fmt", "reflect", "time"},
"internal/testenv": {"L2", "OS", "flag", "testing", "syscall"}, "internal/testenv": {"L2", "OS", "flag", "testing", "syscall"},
"internal/lazyregexp": {"L2", "OS", "regexp"},
// L4 is defined as L3+fmt+log+time, because in general once // L4 is defined as L3+fmt+log+time, because in general once
// you're using L3 packages, use of fmt, log, or time is not a big deal. // you're using L3 packages, use of fmt, log, or time is not a big deal.
...@@ -208,7 +209,7 @@ var pkgDeps = map[string][]string{ ...@@ -208,7 +209,7 @@ var pkgDeps = map[string][]string{
// Go parser. // Go parser.
"go/ast": {"L4", "OS", "go/scanner", "go/token"}, "go/ast": {"L4", "OS", "go/scanner", "go/token"},
"go/doc": {"L4", "OS", "go/ast", "go/token", "regexp", "text/template"}, "go/doc": {"L4", "OS", "go/ast", "go/token", "regexp", "internal/lazyregexp", "text/template"},
"go/parser": {"L4", "OS", "go/ast", "go/scanner", "go/token"}, "go/parser": {"L4", "OS", "go/ast", "go/scanner", "go/token"},
"go/printer": {"L4", "OS", "go/ast", "go/scanner", "go/token", "text/tabwriter"}, "go/printer": {"L4", "OS", "go/ast", "go/scanner", "go/token", "text/tabwriter"},
"go/scanner": {"L4", "OS", "go/token"}, "go/scanner": {"L4", "OS", "go/token"},
......
...@@ -8,6 +8,7 @@ package doc ...@@ -8,6 +8,7 @@ package doc
import ( import (
"bytes" "bytes"
"internal/lazyregexp"
"io" "io"
"strings" "strings"
"text/template" // for HTMLEscape "text/template" // for HTMLEscape
...@@ -69,7 +70,7 @@ const ( ...@@ -69,7 +70,7 @@ const (
urlRx = protoPart + `://` + hostPart + pathPart urlRx = protoPart + `://` + hostPart + pathPart
) )
var matchRx = newLazyRE(`(` + urlRx + `)|(` + identRx + `)`) var matchRx = lazyregexp.New(`(` + urlRx + `)|(` + identRx + `)`)
var ( var (
html_a = []byte(`<a href="`) html_a = []byte(`<a href="`)
...@@ -273,7 +274,7 @@ type block struct { ...@@ -273,7 +274,7 @@ type block struct {
lines []string lines []string
} }
var nonAlphaNumRx = newLazyRE(`[^a-zA-Z0-9]`) var nonAlphaNumRx = lazyregexp.New(`[^a-zA-Z0-9]`)
func anchorID(line string) string { func anchorID(line string) string {
// Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols. // Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
......
...@@ -7,6 +7,7 @@ package doc ...@@ -7,6 +7,7 @@ package doc
import ( import (
"go/ast" "go/ast"
"go/token" "go/token"
"internal/lazyregexp"
"sort" "sort"
"strconv" "strconv"
) )
...@@ -439,9 +440,9 @@ func (r *reader) readFunc(fun *ast.FuncDecl) { ...@@ -439,9 +440,9 @@ func (r *reader) readFunc(fun *ast.FuncDecl) {
} }
var ( var (
noteMarker = `([A-Z][A-Z]+)\(([^)]+)\):?` // MARKER(uid), MARKER at least 2 chars, uid at least 1 char noteMarker = `([A-Z][A-Z]+)\(([^)]+)\):?` // MARKER(uid), MARKER at least 2 chars, uid at least 1 char
noteMarkerRx = newLazyRE(`^[ \t]*` + noteMarker) // MARKER(uid) at text start noteMarkerRx = lazyregexp.New(`^[ \t]*` + noteMarker) // MARKER(uid) at text start
noteCommentRx = newLazyRE(`^/[/*][ \t]*` + noteMarker) // MARKER(uid) at comment start noteCommentRx = lazyregexp.New(`^/[/*][ \t]*` + noteMarker) // MARKER(uid) at comment start
) )
// readNote collects a single note from a sequence of comments. // readNote collects a single note from a sequence of comments.
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package doc package lazyregexp
import ( import (
"os" "os"
...@@ -11,38 +11,38 @@ import ( ...@@ -11,38 +11,38 @@ import (
"sync" "sync"
) )
type lazyRE struct { type Regexp struct {
str string str string
once sync.Once once sync.Once
rx *regexp.Regexp rx *regexp.Regexp
} }
func (r *lazyRE) re() *regexp.Regexp { func (r *Regexp) re() *regexp.Regexp {
r.once.Do(r.build) r.once.Do(r.build)
return r.rx return r.rx
} }
func (r *lazyRE) build() { func (r *Regexp) build() {
r.rx = regexp.MustCompile(r.str) r.rx = regexp.MustCompile(r.str)
r.str = "" r.str = ""
} }
func (r *lazyRE) FindStringSubmatchIndex(s string) []int { func (r *Regexp) FindStringSubmatchIndex(s string) []int {
return r.re().FindStringSubmatchIndex(s) return r.re().FindStringSubmatchIndex(s)
} }
func (r *lazyRE) ReplaceAllString(src, repl string) string { func (r *Regexp) ReplaceAllString(src, repl string) string {
return r.re().ReplaceAllString(src, repl) return r.re().ReplaceAllString(src, repl)
} }
func (r *lazyRE) MatchString(s string) bool { func (r *Regexp) MatchString(s string) bool {
return r.re().MatchString(s) return r.re().MatchString(s)
} }
var inTest = len(os.Args) > 0 && strings.HasSuffix(strings.TrimSuffix(os.Args[0], ".exe"), ".test") var inTest = len(os.Args) > 0 && strings.HasSuffix(strings.TrimSuffix(os.Args[0], ".exe"), ".test")
func newLazyRE(str string) *lazyRE { func New(str string) *Regexp {
lr := &lazyRE{str: str} lr := &Regexp{str: str}
if inTest { if inTest {
// In tests, always compile the regexps early. // In tests, always compile the regexps early.
lr.re() lr.re()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment