Commit 5aea5979 authored by Than McIntosh's avatar Than McIntosh

[dev.link] cmd/link/internal/loader: do more bulk allocation

Change the loader to do more bulk allocation when making slices of
small objects (sym.Reloc, etc) as part of creating and populating
sym.Symbols in loader.LoadFull(). This replaces a large number of
small allocations with a smaller number of large allocations,
improving performace. Compilebench numbers (linker portion) for this
change:

name                      old time/op       new time/op       delta
LinkCompiler                    1.71s ±11%        1.57s ± 9%   -8.35%  (p=0.000 n=19+20)
LinkWithoutDebugCompiler        1.19s ±14%        1.10s ±13%   -7.93%  (p=0.000 n=20+19)

name                      old user-time/op  new user-time/op  delta
LinkCompiler                    1.86s ±15%        1.34s ±10%  -28.02%  (p=0.000 n=20+20)
LinkWithoutDebugCompiler        1.05s ±14%        0.95s ± 9%   -9.17%  (p=0.000 n=19+20)

Hyperkube from kubernetes doesn't show any significant benefit (which
seems a little surprising).

Change-Id: Ide97f78532fb60b08bb6e4cfa097e9058f7ea8ab
Reviewed-on: https://go-review.googlesource.com/c/go/+/203457
Run-TryBot: Than McIntosh <thanm@google.com>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
Reviewed-by: default avatarJeremy Faller <jeremy@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent e961b26c
...@@ -117,6 +117,8 @@ type Loader struct { ...@@ -117,6 +117,8 @@ type Loader struct {
// field tracking is enabled. Reachparent[K] contains the index of // field tracking is enabled. Reachparent[K] contains the index of
// the symbol that triggered the marking of symbol K as live. // the symbol that triggered the marking of symbol K as live.
Reachparent []Sym Reachparent []Sym
relocBatch []sym.Reloc // for bulk allocation of relocations
} }
func NewLoader() *Loader { func NewLoader() *Loader {
...@@ -229,7 +231,7 @@ func (l *Loader) IsExternal(i Sym) bool { ...@@ -229,7 +231,7 @@ func (l *Loader) IsExternal(i Sym) bool {
return l.extStart != 0 && i >= l.extStart return l.extStart != 0 && i >= l.extStart
} }
// Ensure Syms slice als enough space. // Ensure Syms slice has enough space.
func (l *Loader) growSyms(i int) { func (l *Loader) growSyms(i int) {
n := len(l.Syms) n := len(l.Syms)
if n > i { if n > i {
...@@ -735,10 +737,15 @@ func preprocess(arch *sys.Arch, s *sym.Symbol) { ...@@ -735,10 +737,15 @@ func preprocess(arch *sys.Arch, s *sym.Symbol) {
func (l *Loader) LoadFull(arch *sys.Arch, syms *sym.Symbols) { func (l *Loader) LoadFull(arch *sys.Arch, syms *sym.Symbols) {
// create all Symbols first. // create all Symbols first.
l.growSyms(l.NSym()) l.growSyms(l.NSym())
nr := 0 // total number of sym.Reloc's we'll need
for _, o := range l.objs[1:] { for _, o := range l.objs[1:] {
loadObjSyms(l, syms, o.r) nr += loadObjSyms(l, syms, o.r)
} }
// allocate a single large slab of relocations for all live symbols
l.relocBatch = make([]sym.Reloc, nr)
// external symbols // external symbols
for i := l.extStart; i <= l.max; i++ { for i := l.extStart; i <= l.max; i++ {
if s := l.Syms[i]; s != nil { if s := l.Syms[i]; s != nil {
...@@ -811,14 +818,19 @@ func (l *Loader) addNewSym(i Sym, syms *sym.Symbols, name string, ver int, unit ...@@ -811,14 +818,19 @@ func (l *Loader) addNewSym(i Sym, syms *sym.Symbols, name string, ver int, unit
return s return s
} }
func loadObjSyms(l *Loader, syms *sym.Symbols, r *oReader) { // loadObjSyms creates sym.Symbol objects for the live Syms in the
// object corresponding to object reader "r". Return value is the
// number of sym.Reloc entries required for all the new symbols.
func loadObjSyms(l *Loader, syms *sym.Symbols, r *oReader) int {
istart := l.startIndex(r) istart := l.startIndex(r)
nr := 0
for i, n := 0, r.NSym()+r.NNonpkgdef(); i < n; i++ { for i, n := 0, r.NSym()+r.NNonpkgdef(); i < n; i++ {
// If it's been previously loaded in host object loading, we don't need to do it again. // If it's been previously loaded in host object loading, we don't need to do it again.
if s := l.Syms[istart+Sym(i)]; s != nil { if s := l.Syms[istart+Sym(i)]; s != nil {
// Mark symbol as reachable as it wasn't marked as such before. // Mark symbol as reachable as it wasn't marked as such before.
s.Attr.Set(sym.AttrReachable, l.Reachable.Has(istart+Sym(i))) s.Attr.Set(sym.AttrReachable, l.Reachable.Has(istart+Sym(i)))
nr += r.NReloc(i)
continue continue
} }
osym := goobj2.Sym{} osym := goobj2.Sym{}
...@@ -848,7 +860,28 @@ func loadObjSyms(l *Loader, syms *sym.Symbols, r *oReader) { ...@@ -848,7 +860,28 @@ func loadObjSyms(l *Loader, syms *sym.Symbols, r *oReader) {
s := l.addNewSym(istart+Sym(i), syms, name, ver, r.unit, t) s := l.addNewSym(istart+Sym(i), syms, name, ver, r.unit, t)
s.Attr.Set(sym.AttrReachable, l.Reachable.Has(istart+Sym(i))) s.Attr.Set(sym.AttrReachable, l.Reachable.Has(istart+Sym(i)))
nr += r.NReloc(i)
} }
return nr
}
// funcInfoSym records the sym.Symbol for a function, along with a copy
// of the corresponding goobj2.Sym and the index of its FuncInfo aux sym.
// We use this to delay populating FuncInfo until we can batch-allocate
// slices for their sub-objects.
type funcInfoSym struct {
s *sym.Symbol // sym.Symbol for a live function
osym goobj2.Sym // object file symbol data for that function
isym int // global symbol index of FuncInfo aux sym for func
}
// funcAllocInfo records totals/counts for all functions in an objfile;
// used to help with bulk allocation of sym.Symbol sub-objects.
type funcAllocInfo struct {
symPtr uint32 // number of *sym.Symbol's needed in file slices
inlCall uint32 // number of sym.InlinedCall's needed in inltree slices
pcData uint32 // number of sym.Pcdata's needed in pdata slices
fdOff uint32 // number of int64's needed in all Funcdataoff slices
} }
// LoadSymbol loads a single symbol by name. // LoadSymbol loads a single symbol by name.
...@@ -884,6 +917,9 @@ func loadObjFull(l *Loader, r *oReader) { ...@@ -884,6 +917,9 @@ func loadObjFull(l *Loader, r *oReader) {
return l.Syms[i] return l.Syms[i]
} }
funcs := []funcInfoSym{}
fdsyms := []*sym.Symbol{}
var funcAllocCounts funcAllocInfo
pcdataBase := r.PcdataBase() pcdataBase := r.PcdataBase()
rslice := []Reloc{} rslice := []Reloc{}
for i, n := 0, r.NSym()+r.NNonpkgdef(); i < n; i++ { for i, n := 0, r.NSym()+r.NNonpkgdef(); i < n; i++ {
...@@ -930,7 +966,9 @@ func loadObjFull(l *Loader, r *oReader) { ...@@ -930,7 +966,9 @@ func loadObjFull(l *Loader, r *oReader) {
// Relocs // Relocs
relocs := l.relocs(r, i) relocs := l.relocs(r, i)
rslice = relocs.ReadAll(rslice) rslice = relocs.ReadAll(rslice)
s.R = make([]sym.Reloc, relocs.Count) batch := l.relocBatch
s.R = batch[:relocs.Count:relocs.Count]
l.relocBatch = batch[relocs.Count:]
for j := range s.R { for j := range s.R {
r := rslice[j] r := rslice[j]
rs := r.Sym rs := r.Sym
...@@ -974,12 +1012,7 @@ func loadObjFull(l *Loader, r *oReader) { ...@@ -974,12 +1012,7 @@ func loadObjFull(l *Loader, r *oReader) {
s.Gotype = typ s.Gotype = typ
} }
case goobj2.AuxFuncdata: case goobj2.AuxFuncdata:
pc := s.FuncInfo fdsyms = append(fdsyms, resolveSymRef(a.Sym))
if pc == nil {
pc = &sym.FuncInfo{Funcdata: make([]*sym.Symbol, 0, 4)}
s.FuncInfo = pc
}
pc.Funcdata = append(pc.Funcdata, resolveSymRef(a.Sym))
case goobj2.AuxFuncInfo: case goobj2.AuxFuncInfo:
if a.Sym.PkgIdx != goobj2.PkgIdxSelf { if a.Sym.PkgIdx != goobj2.PkgIdxSelf {
panic("funcinfo symbol not defined in current package") panic("funcinfo symbol not defined in current package")
...@@ -1014,10 +1047,44 @@ func loadObjFull(l *Loader, r *oReader) { ...@@ -1014,10 +1047,44 @@ func loadObjFull(l *Loader, r *oReader) {
continue continue
} }
// FuncInfo
if isym == -1 { if isym == -1 {
continue continue
} }
// Record function sym and associated info for additional
// processing in the loop below.
fwis := funcInfoSym{s: s, isym: isym, osym: osym}
funcs = append(funcs, fwis)
// Read the goobj2.FuncInfo for this text symbol so that we can
// collect allocation counts. We'll read it again in the loop
// below.
b := r.Data(isym)
info := goobj2.FuncInfo{}
info.Read(b)
funcAllocCounts.symPtr += uint32(len(info.File))
funcAllocCounts.pcData += uint32(len(info.Pcdata))
funcAllocCounts.inlCall += uint32(len(info.InlTree))
funcAllocCounts.fdOff += uint32(len(info.Funcdataoff))
}
// At this point we can do batch allocation of the sym.FuncInfo's,
// along with the slices of sub-objects they use.
fiBatch := make([]sym.FuncInfo, len(funcs))
inlCallBatch := make([]sym.InlinedCall, funcAllocCounts.inlCall)
symPtrBatch := make([]*sym.Symbol, funcAllocCounts.symPtr)
pcDataBatch := make([]sym.Pcdata, funcAllocCounts.pcData)
fdOffBatch := make([]int64, funcAllocCounts.fdOff)
// Populate FuncInfo contents for func symbols.
for fi := 0; fi < len(funcs); fi++ {
s := funcs[fi].s
isym := funcs[fi].isym
osym := funcs[fi].osym
s.FuncInfo = &fiBatch[0]
fiBatch = fiBatch[1:]
b := r.Data(isym) b := r.Data(isym)
info := goobj2.FuncInfo{} info := goobj2.FuncInfo{}
info.Read(b) info.Read(b)
...@@ -1035,18 +1102,34 @@ func loadObjFull(l *Loader, r *oReader) { ...@@ -1035,18 +1102,34 @@ func loadObjFull(l *Loader, r *oReader) {
s.Attr |= sym.AttrTopFrame s.Attr |= sym.AttrTopFrame
} }
info.Pcdata = append(info.Pcdata, info.PcdataEnd) // for the ease of knowing where it ends
pc := s.FuncInfo pc := s.FuncInfo
if pc == nil {
pc = &sym.FuncInfo{} if len(info.Funcdataoff) != 0 {
s.FuncInfo = pc nfd := len(info.Funcdataoff)
pc.Funcdata = fdsyms[:nfd:nfd]
fdsyms = fdsyms[nfd:]
} }
info.Pcdata = append(info.Pcdata, info.PcdataEnd) // for the ease of knowing where it ends
pc.Args = int32(info.Args) pc.Args = int32(info.Args)
pc.Locals = int32(info.Locals) pc.Locals = int32(info.Locals)
pc.Pcdata = make([]sym.Pcdata, len(info.Pcdata)-1) // -1 as we appended one above
pc.Funcdataoff = make([]int64, len(info.Funcdataoff)) npc := len(info.Pcdata) - 1 // -1 as we appended one above
pc.File = make([]*sym.Symbol, len(info.File)) pc.Pcdata = pcDataBatch[:npc:npc]
pc.InlTree = make([]sym.InlinedCall, len(info.InlTree)) pcDataBatch = pcDataBatch[npc:]
nfd := len(info.Funcdataoff)
pc.Funcdataoff = fdOffBatch[:nfd:nfd]
fdOffBatch = fdOffBatch[nfd:]
nsp := len(info.File)
pc.File = symPtrBatch[:nsp:nsp]
symPtrBatch = symPtrBatch[nsp:]
nic := len(info.InlTree)
pc.InlTree = inlCallBatch[:nic:nic]
inlCallBatch = inlCallBatch[nic:]
pc.Pcsp.P = r.BytesAt(pcdataBase+info.Pcsp, int(info.Pcfile-info.Pcsp)) pc.Pcsp.P = r.BytesAt(pcdataBase+info.Pcsp, int(info.Pcfile-info.Pcsp))
pc.Pcfile.P = r.BytesAt(pcdataBase+info.Pcfile, int(info.Pcline-info.Pcfile)) pc.Pcfile.P = r.BytesAt(pcdataBase+info.Pcfile, int(info.Pcline-info.Pcfile))
pc.Pcline.P = r.BytesAt(pcdataBase+info.Pcline, int(info.Pcinline-info.Pcline)) pc.Pcline.P = r.BytesAt(pcdataBase+info.Pcline, int(info.Pcinline-info.Pcline))
...@@ -1071,6 +1154,7 @@ func loadObjFull(l *Loader, r *oReader) { ...@@ -1071,6 +1154,7 @@ func loadObjFull(l *Loader, r *oReader) {
} }
} }
dupok := osym.Dupok()
if !dupok { if !dupok {
if s.Attr.OnList() { if s.Attr.OnList() {
log.Fatalf("symbol %s listed multiple times", s.Name) log.Fatalf("symbol %s listed multiple times", s.Name)
...@@ -1078,7 +1162,7 @@ func loadObjFull(l *Loader, r *oReader) { ...@@ -1078,7 +1162,7 @@ func loadObjFull(l *Loader, r *oReader) {
s.Attr.Set(sym.AttrOnList, true) s.Attr.Set(sym.AttrOnList, true)
lib.Textp = append(lib.Textp, s) lib.Textp = append(lib.Textp, s)
} else { } else {
// there may ba a dup in another package // there may be a dup in another package
// put into a temp list and add to text later // put into a temp list and add to text later
lib.DupTextSyms = append(lib.DupTextSyms, s) lib.DupTextSyms = append(lib.DupTextSyms, s)
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment