Commit 5c359d80 authored by Josh Bleecher Snyder's avatar Josh Bleecher Snyder

cmd/compile: add Prog cache to Progs

The existing bulk/cached Prog allocator, Ctxt.NewProg, is not concurrency-safe.
This CL moves Prog allocation to its clients, the compiler and the assembler.

The assembler is so fast and generates so few Progs that it does not need
optimization of Prog allocation. I could not generate measureable changes.
And even if I could, the assembly is a miniscule portion of build times.

The compiler already has a natural place to manage Prog allocation;
this CL migrates the Prog cache there.
It will be made concurrency-safe in a later CL by
partitioning the Prog cache into chunks and assigning each chunk
to a different goroutine to manage.

This CL does cause a performance degradation when the compiler
is invoked with the -S flag (to dump assembly).
However, such usage is rare and almost always done manually.
The one instance I know of in a test is TestAssembly
in cmd/compile/internal/gc, and I did not detect
a measurable performance impact there.

Passes toolstash-check -all.
Minor compiler performance impact.

Updates #15756

Performance impact from just this CL:

name        old time/op     new time/op     delta
Template        213ms ± 4%      213ms ± 4%    ~     (p=0.571 n=49+49)
Unicode        89.1ms ± 3%     89.4ms ± 3%    ~     (p=0.388 n=47+48)
GoTypes         581ms ± 2%      584ms ± 3%  +0.56%  (p=0.019 n=47+48)
SSA             6.48s ± 2%      6.53s ± 2%  +0.84%  (p=0.000 n=47+49)
Flate           128ms ± 4%      128ms ± 4%    ~     (p=0.832 n=49+49)
GoParser        152ms ± 3%      152ms ± 3%    ~     (p=0.815 n=48+47)
Reflect         371ms ± 4%      371ms ± 3%    ~     (p=0.617 n=50+47)
Tar             112ms ± 4%      112ms ± 3%    ~     (p=0.724 n=49+49)
XML             208ms ± 3%      208ms ± 4%    ~     (p=0.678 n=49+50)
[Geo mean]      284ms           285ms       +0.18%

name        old user-ns/op  new user-ns/op  delta
Template         251M ± 7%       252M ±11%    ~     (p=0.704 n=49+50)
Unicode          107M ± 7%       108M ± 5%  +1.25%  (p=0.036 n=50+49)
GoTypes          738M ± 3%       740M ± 3%    ~     (p=0.305 n=49+48)
SSA             8.83G ± 2%      8.86G ± 4%    ~     (p=0.098 n=47+50)
Flate            146M ± 6%       147M ± 3%    ~     (p=0.584 n=48+41)
GoParser         178M ± 6%       179M ± 5%  +0.93%  (p=0.036 n=49+48)
Reflect          441M ± 4%       446M ± 7%    ~     (p=0.218 n=44+49)
Tar              126M ± 5%       126M ± 5%    ~     (p=0.766 n=48+49)
XML              245M ± 5%       244M ± 4%    ~     (p=0.359 n=50+50)
[Geo mean]       341M            342M       +0.51%

Performance impact from this CL combined with its parent:

name        old time/op     new time/op     delta
Template        213ms ± 3%      214ms ± 4%    ~     (p=0.685 n=47+50)
Unicode        89.8ms ± 6%     90.5ms ± 6%    ~     (p=0.055 n=50+50)
GoTypes         584ms ± 3%      585ms ± 2%    ~     (p=0.710 n=49+47)
SSA             6.50s ± 2%      6.53s ± 2%  +0.39%  (p=0.011 n=46+50)
Flate           128ms ± 3%      128ms ± 4%    ~     (p=0.855 n=47+49)
GoParser        152ms ± 3%      152ms ± 3%    ~     (p=0.666 n=49+49)
Reflect         371ms ± 3%      372ms ± 3%    ~     (p=0.298 n=48+48)
Tar             112ms ± 5%      113ms ± 3%    ~     (p=0.107 n=49+49)
XML             208ms ± 3%      208ms ± 2%    ~     (p=0.881 n=50+49)
[Geo mean]      285ms           285ms       +0.26%

name        old user-ns/op  new user-ns/op  delta
Template         254M ± 9%       252M ± 8%    ~     (p=0.290 n=49+50)
Unicode          106M ± 6%       108M ± 7%  +1.44%  (p=0.034 n=50+50)
GoTypes          741M ± 4%       743M ± 4%    ~     (p=0.992 n=50+49)
SSA             8.86G ± 2%      8.83G ± 3%    ~     (p=0.158 n=47+49)
Flate            147M ± 4%       148M ± 5%    ~     (p=0.832 n=50+49)
GoParser         179M ± 5%       178M ± 5%    ~     (p=0.370 n=48+50)
Reflect          441M ± 6%       445M ± 7%    ~     (p=0.246 n=45+47)
Tar              126M ± 6%       126M ± 6%    ~     (p=0.815 n=49+50)
XML              244M ± 3%       245M ± 4%    ~     (p=0.190 n=50+50)
[Geo mean]       342M            342M       +0.17%

Change-Id: I020f1c079d495fbe2e15ccb51e1ea2cc1b5a1855
Reviewed-on: https://go-review.googlesource.com/39634
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 52d8d7b9
......@@ -179,7 +179,7 @@ Diff:
t.Errorf(format, args...)
ok = false
}
obj.FlushplistNoFree(ctxt, pList)
obj.Flushplist(ctxt, pList, nil)
for p := top; p != nil; p = p.Link {
if p.As == obj.ATEXT {
......@@ -283,7 +283,7 @@ func testErrors(t *testing.T, goarch, file string) {
errBuf.WriteString(s)
}
pList.Firstpc, ok = parser.Parse()
obj.Flushplist(ctxt, pList)
obj.Flushplist(ctxt, pList, nil)
if ok && !failed {
t.Errorf("asm: %s had no errors", goarch)
}
......
......@@ -69,7 +69,7 @@ func main() {
break
}
// reports errors to parser.Errorf
obj.Flushplist(ctxt, pList)
obj.Flushplist(ctxt, pList, nil)
}
if ok {
obj.WriteObjFile(ctxt, buf)
......
......@@ -35,22 +35,33 @@ import (
"cmd/internal/src"
)
var sharedProgArray *[10000]obj.Prog // *T instead of T to work around issue 19839
func init() {
sharedProgArray = new([10000]obj.Prog)
}
// Progs accumulates Progs for a function and converts them into machine code.
type Progs struct {
Text *obj.Prog // ATEXT Prog for this function
next *obj.Prog // next Prog
pc int64 // virtual PC; count of Progs
pos src.XPos // position to use for new Progs
curfn *Node // fn these Progs are for
Text *obj.Prog // ATEXT Prog for this function
next *obj.Prog // next Prog
pc int64 // virtual PC; count of Progs
pos src.XPos // position to use for new Progs
curfn *Node // fn these Progs are for
progcache []obj.Prog // local progcache
cacheidx int // first free element of progcache
}
// newProgs returns a new Progs for fn.
func newProgs(fn *Node) *Progs {
pp := new(Progs)
if Ctxt.CanReuseProgs() {
pp.progcache = sharedProgArray[:]
}
pp.curfn = fn
// prime the pump
pp.next = Ctxt.NewProg()
pp.next = pp.NewProg()
pp.clearp(pp.next)
pp.pos = fn.Pos
......@@ -58,18 +69,41 @@ func newProgs(fn *Node) *Progs {
return pp
}
func (pp *Progs) NewProg() *obj.Prog {
if pp.cacheidx < len(pp.progcache) {
p := &pp.progcache[pp.cacheidx]
p.Ctxt = Ctxt
pp.cacheidx++
return p
}
p := new(obj.Prog)
p.Ctxt = Ctxt
return p
}
// Flush converts from pp to machine code.
func (pp *Progs) Flush() {
plist := &obj.Plist{Firstpc: pp.Text, Curfn: pp.curfn}
obj.Flushplist(Ctxt, plist)
// Clear pp to enable GC and avoid abuse.
obj.Flushplist(Ctxt, plist, pp.NewProg)
}
// Free clears pp and any associated resources.
func (pp *Progs) Free() {
if Ctxt.CanReuseProgs() {
// Clear progs to enable GC and avoid abuse.
s := pp.progcache[:pp.cacheidx]
for i := range s {
s[i] = obj.Prog{}
}
}
// Clear pp to avoid abuse.
*pp = Progs{}
}
// Prog adds a Prog with instruction As to pp.
func (pp *Progs) Prog(as obj.As) *obj.Prog {
p := pp.next
pp.next = Ctxt.NewProg()
pp.next = pp.NewProg()
pp.clearp(pp.next)
p.Link = pp.next
......@@ -90,7 +124,7 @@ func (pp *Progs) clearp(p *obj.Prog) {
}
func (pp *Progs) Appendpp(p *obj.Prog, as obj.As, ftype obj.AddrType, freg int16, foffset int64, ttype obj.AddrType, treg int16, toffset int64) *obj.Prog {
q := Ctxt.NewProg()
q := pp.NewProg()
pp.clearp(q)
q.As = as
q.Pos = p.Pos
......
......@@ -301,11 +301,12 @@ func compile(fn *Node) {
pp := newProgs(fn)
genssa(ssafn, pp)
fieldtrack(pp.Text.From.Sym, fn.Func.FieldTrack)
if pp.Text.To.Offset >= 1<<31 {
if pp.Text.To.Offset < 1<<31 {
pp.Flush()
} else {
largeStackFrames = append(largeStackFrames, fn.Pos)
return
}
pp.Flush()
pp.Free()
}
func debuginfo(fnsym *obj.LSym, curfn interface{}) []*dwarf.Var {
......
......@@ -749,10 +749,6 @@ type Link struct {
// state for writing objects
Text []*LSym
Data []*LSym
// Cache of Progs
allocIdx int
progs [10000]Prog
}
func (ctxt *Link) Diag(format string, args ...interface{}) {
......
......@@ -19,13 +19,7 @@ type Plist struct {
// It is used to provide access to cached/bulk-allocated Progs to the assemblers.
type ProgAlloc func() *Prog
func Flushplist(ctxt *Link, plist *Plist) {
flushplist(ctxt, plist, !ctxt.Debugasm)
}
func FlushplistNoFree(ctxt *Link, plist *Plist) {
flushplist(ctxt, plist, false)
}
func flushplist(ctxt *Link, plist *Plist, freeProgs bool) {
func Flushplist(ctxt *Link, plist *Plist, newprog ProgAlloc) {
// Build list of symbols, and assign instructions to lists.
var curtext *LSym
var etext *Prog
......@@ -101,7 +95,9 @@ func flushplist(ctxt *Link, plist *Plist, freeProgs bool) {
etext = p
}
newprog := ProgAlloc(ctxt.NewProg)
if newprog == nil {
newprog = ctxt.NewProg
}
// Add reference to Go arguments for C or assembly functions without them.
for _, s := range text {
......@@ -135,16 +131,10 @@ func flushplist(ctxt *Link, plist *Plist, freeProgs bool) {
ctxt.Arch.Assemble(ctxt, s, newprog)
linkpcln(ctxt, s)
makeFuncDebugEntry(ctxt, plist.Curfn, s)
if freeProgs {
s.Text = nil
}
}
// Add to running list in ctxt.
ctxt.Text = append(ctxt.Text, text...)
if freeProgs {
ctxt.freeProgs()
}
}
func (ctxt *Link) Globl(s *LSym, size int64, flag int) {
......
......@@ -163,22 +163,13 @@ func (p *Prog) String() string {
}
func (ctxt *Link) NewProg() *Prog {
var p *Prog
if i := ctxt.allocIdx; i < len(ctxt.progs) {
p = &ctxt.progs[i]
ctxt.allocIdx = i + 1
} else {
p = new(Prog) // should be the only call to this; all others should use ctxt.NewProg
}
p := new(Prog)
p.Ctxt = ctxt
return p
}
func (ctxt *Link) freeProgs() {
s := ctxt.progs[:ctxt.allocIdx]
for i := range s {
s[i] = Prog{}
}
ctxt.allocIdx = 0
func (ctxt *Link) CanReuseProgs() bool {
return !ctxt.Debugasm
}
func (ctxt *Link) Dconv(a *Addr) string {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment