Commit 7d16e44d authored by Daniel Martí's avatar Daniel Martí

cmd/compile: reduce the regexp work in rulegen

As correctly pointed out by Giovanni Bajo, doing a single regexp pass
should be much faster than doing hundreds per architecture. We can then
use a map to keep track of what ops are handled in each file. And the
amount of saved work is evident:

	name     old time/op         new time/op         delta
	Rulegen          2.48s ± 1%          2.02s ± 1%  -18.44%  (p=0.008 n=5+5)

	name     old user-time/op    new user-time/op    delta
	Rulegen          10.9s ± 1%           8.9s ± 0%  -18.27%  (p=0.008 n=5+5)

	name     old sys-time/op     new sys-time/op     delta
	Rulegen          209ms ±28%          236ms ±18%     ~     (p=0.310 n=5+5)

	name     old peak-RSS-bytes  new peak-RSS-bytes  delta
	Rulegen          178MB ± 3%          176MB ± 3%     ~     (p=0.548 n=5+5)

The speed-up is so large that we don't need to parallelize it anymore;
the numbers above are with the removed goroutines. Adding them back in
doesn't improve performance noticeably at all:

	name     old time/op         new time/op         delta
	Rulegen          2.02s ± 1%          2.01s ± 1%   ~     (p=0.421 n=5+5)

	name     old user-time/op    new user-time/op    delta
	Rulegen          8.90s ± 0%          8.96s ± 1%   ~     (p=0.095 n=5+5)

While at it, remove an unused method.

Change-Id: I328b56e63b64a9ab48147e67e7d5a385c795ec54
Reviewed-on: https://go-review.googlesource.com/c/go/+/195739
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent ec4e8517
...@@ -392,34 +392,35 @@ func genOp() { ...@@ -392,34 +392,35 @@ func genOp() {
// Check that the arch genfile handles all the arch-specific opcodes. // Check that the arch genfile handles all the arch-specific opcodes.
// This is very much a hack, but it is better than nothing. // This is very much a hack, but it is better than nothing.
var wg sync.WaitGroup //
// Do a single regexp pass to record all ops being handled in a map, and
// then compare that with the ops list. This is much faster than one
// regexp pass per opcode.
for _, a := range archs { for _, a := range archs {
if a.genfile == "" { if a.genfile == "" {
continue continue
} }
a := a pattern := fmt.Sprintf(`\Wssa\.Op%s([a-zA-Z0-9_]+)\W`, a.name)
wg.Add(1) rxOp, err := regexp.Compile(pattern)
go func() { if err != nil {
src, err := ioutil.ReadFile(a.genfile) log.Fatalf("bad opcode regexp %s: %v", pattern, err)
if err != nil { }
log.Fatalf("can't read %s: %v", a.genfile, err)
}
for _, v := range a.ops { src, err := ioutil.ReadFile(a.genfile)
pattern := fmt.Sprintf(`\Wssa\.Op%s%s\W`, a.name, v.name) if err != nil {
match, err := regexp.Match(pattern, src) log.Fatalf("can't read %s: %v", a.genfile, err)
if err != nil { }
log.Fatalf("bad opcode regexp %s: %v", pattern, err) seen := make(map[string]bool, len(a.ops))
} for _, m := range rxOp.FindAllSubmatch(src, -1) {
if !match { seen[string(m[1])] = true
log.Fatalf("Op%s%s has no code generation in %s", a.name, v.name, a.genfile) }
} for _, op := range a.ops {
if !seen[op.name] {
log.Fatalf("Op%s%s has no code generation in %s", a.name, op.name, a.genfile)
} }
wg.Done() }
}()
} }
wg.Wait()
} }
// Name returns the name of the architecture for use in Op* and Block* enumerations. // Name returns the name of the architecture for use in Op* and Block* enumerations.
......
...@@ -636,7 +636,6 @@ type bodyBase struct { ...@@ -636,7 +636,6 @@ type bodyBase struct {
canFail bool canFail bool
} }
func (w *bodyBase) body() []Statement { return w.list }
func (w *bodyBase) add(nodes ...Statement) { func (w *bodyBase) add(nodes ...Statement) {
w.list = append(w.list, nodes...) w.list = append(w.list, nodes...)
for _, node := range nodes { for _, node := range nodes {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment