Commit 9f95c9db authored by Austin Clements's avatar Austin Clements

cmd/compile, cmd/internal/obj: record register maps in binary

This adds FUNCDATA and PCDATA that records the register maps much like
the existing live arguments maps and live locals maps. The register
map is indexed independently from the argument and locals maps since
changes in register liveness tend not to correlate with changes to
argument and local liveness.

This is the final CL toward adding safe-points everywhere. The
following CLs will optimize liveness analysis to bring down the cost.
The effect of this CL is:

name        old time/op       new time/op       delta
Template          195ms ± 2%        197ms ± 1%    ~     (p=0.136 n=9+9)
Unicode          98.4ms ± 2%       99.7ms ± 1%  +1.39%  (p=0.004 n=10+10)
GoTypes           685ms ± 1%        700ms ± 1%  +2.06%  (p=0.000 n=9+9)
Compiler          3.28s ± 1%        3.34s ± 0%  +1.71%  (p=0.000 n=9+8)
SSA               7.79s ± 1%        7.91s ± 1%  +1.55%  (p=0.000 n=10+9)
Flate             133ms ± 2%        133ms ± 2%    ~     (p=0.190 n=10+10)
GoParser          161ms ± 2%        164ms ± 3%  +1.83%  (p=0.015 n=10+10)
Reflect           450ms ± 1%        457ms ± 1%  +1.62%  (p=0.000 n=10+10)
Tar               183ms ± 2%        185ms ± 1%  +0.91%  (p=0.008 n=9+10)
XML               234ms ± 1%        238ms ± 1%  +1.60%  (p=0.000 n=9+9)
[Geo mean]        411ms             417ms       +1.40%

name        old exe-bytes     new exe-bytes     delta
HelloSize         1.47M ± 0%        1.51M ± 0%  +2.79%  (p=0.000 n=10+10)

Compared to just before "cmd/internal/obj: consolidate emitting entry
stack map", the cumulative effect of adding stack maps everywhere and
register maps is:

name        old time/op       new time/op       delta
Template          185ms ± 2%        197ms ± 1%   +6.42%  (p=0.000 n=10+9)
Unicode          96.3ms ± 3%       99.7ms ± 1%   +3.60%  (p=0.000 n=10+10)
GoTypes           658ms ± 0%        700ms ± 1%   +6.37%  (p=0.000 n=10+9)
Compiler          3.14s ± 1%        3.34s ± 0%   +6.53%  (p=0.000 n=9+8)
SSA               7.41s ± 2%        7.91s ± 1%   +6.71%  (p=0.000 n=9+9)
Flate             126ms ± 1%        133ms ± 2%   +6.15%  (p=0.000 n=10+10)
GoParser          153ms ± 1%        164ms ± 3%   +6.89%  (p=0.000 n=10+10)
Reflect           437ms ± 1%        457ms ± 1%   +4.59%  (p=0.000 n=10+10)
Tar               178ms ± 1%        185ms ± 1%   +4.18%  (p=0.000 n=10+10)
XML               223ms ± 1%        238ms ± 1%   +6.39%  (p=0.000 n=10+9)
[Geo mean]        394ms             417ms        +5.78%

name        old alloc/op      new alloc/op      delta
Template         34.5MB ± 0%       38.0MB ± 0%  +10.19%  (p=0.000 n=10+10)
Unicode          29.3MB ± 0%       30.3MB ± 0%   +3.56%  (p=0.000 n=8+9)
GoTypes           113MB ± 0%        125MB ± 0%  +10.89%  (p=0.000 n=10+10)
Compiler          510MB ± 0%        575MB ± 0%  +12.79%  (p=0.000 n=10+10)
SSA              1.46GB ± 0%       1.64GB ± 0%  +12.40%  (p=0.000 n=10+10)
Flate            23.9MB ± 0%       25.9MB ± 0%   +8.56%  (p=0.000 n=10+10)
GoParser         28.0MB ± 0%       30.8MB ± 0%  +10.08%  (p=0.000 n=10+10)
Reflect          77.6MB ± 0%       84.3MB ± 0%   +8.63%  (p=0.000 n=10+10)
Tar              34.1MB ± 0%       37.0MB ± 0%   +8.44%  (p=0.000 n=10+10)
XML              42.7MB ± 0%       47.2MB ± 0%  +10.75%  (p=0.000 n=10+10)
[Geo mean]       76.0MB            83.3MB        +9.60%

name        old allocs/op     new allocs/op     delta
Template           321k ± 0%         337k ± 0%   +4.98%  (p=0.000 n=10+10)
Unicode            337k ± 0%         340k ± 0%   +1.04%  (p=0.000 n=10+9)
GoTypes           1.13M ± 0%        1.18M ± 0%   +4.85%  (p=0.000 n=10+10)
Compiler          4.67M ± 0%        4.96M ± 0%   +6.25%  (p=0.000 n=10+10)
SSA               11.7M ± 0%        12.3M ± 0%   +5.69%  (p=0.000 n=10+10)
Flate              216k ± 0%         226k ± 0%   +4.52%  (p=0.000 n=10+9)
GoParser           271k ± 0%         283k ± 0%   +4.52%  (p=0.000 n=10+10)
Reflect            927k ± 0%         972k ± 0%   +4.78%  (p=0.000 n=10+10)
Tar                318k ± 0%         333k ± 0%   +4.56%  (p=0.000 n=10+10)
XML                376k ± 0%         395k ± 0%   +5.04%  (p=0.000 n=10+10)
[Geo mean]         730k              764k        +4.61%

name        old exe-bytes     new exe-bytes     delta
HelloSize         1.46M ± 0%        1.51M ± 0%   +3.66%  (p=0.000 n=10+10)

For #24543.

Change-Id: I91e003dc64151916b384274884bf02a2d6862547
Reviewed-on: https://go-review.googlesource.com/109353
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 61158c16
...@@ -117,6 +117,21 @@ func (bv bvec) Next(i int32) int32 { ...@@ -117,6 +117,21 @@ func (bv bvec) Next(i int32) int32 {
return i return i
} }
// Len returns the minimum number of bits required to represent bv.
// The result is 0 if no bits are set in bv.
func (bv bvec) Len() int32 {
for wi := len(bv.b) - 1; wi >= 0; wi-- {
if w := bv.b[wi]; w != 0 {
for i := wordBits - 1; i >= 0; i-- {
if w>>uint(i) != 0 {
return int32(wi)*wordBits + int32(i) + 1
}
}
}
}
return 0
}
func (bv bvec) IsEmpty() bool { func (bv bvec) IsEmpty() bool {
for _, x := range bv.b { for _, x := range bv.b {
if x != 0 { if x != 0 {
......
...@@ -116,6 +116,14 @@ func (pp *Progs) Prog(as obj.As) *obj.Prog { ...@@ -116,6 +116,14 @@ func (pp *Progs) Prog(as obj.As) *obj.Prog {
Addrconst(&p.From, objabi.PCDATA_StackMapIndex) Addrconst(&p.From, objabi.PCDATA_StackMapIndex)
Addrconst(&p.To, int64(idx)) Addrconst(&p.To, int64(idx))
} }
if pp.nextLive.regMapIndex != pp.prevLive.regMapIndex {
// Emit register map index change.
idx := pp.nextLive.regMapIndex
pp.prevLive.regMapIndex = idx
p := pp.Prog(obj.APCDATA)
Addrconst(&p.From, objabi.PCDATA_RegMapIndex)
Addrconst(&p.To, int64(idx))
}
p := pp.next p := pp.next
pp.next = pp.NewProg() pp.next = pp.NewProg()
...@@ -189,6 +197,12 @@ func (pp *Progs) settext(fn *Node) { ...@@ -189,6 +197,12 @@ func (pp *Progs) settext(fn *Node) {
p.To.Type = obj.TYPE_MEM p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN p.To.Name = obj.NAME_EXTERN
p.To.Sym = &fn.Func.lsym.Func.GCLocals p.To.Sym = &fn.Func.lsym.Func.GCLocals
p = pp.Prog(obj.AFUNCDATA)
Addrconst(&p.From, objabi.FUNCDATA_RegPointerMaps)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = &fn.Func.lsym.Func.GCRegs
} }
func (f *Func) initLSym() { func (f *Func) initLSym() {
......
...@@ -292,7 +292,7 @@ func addGCLocals() { ...@@ -292,7 +292,7 @@ func addGCLocals() {
if s.Func == nil { if s.Func == nil {
continue continue
} }
for _, gcsym := range []*obj.LSym{&s.Func.GCArgs, &s.Func.GCLocals} { for _, gcsym := range []*obj.LSym{&s.Func.GCArgs, &s.Func.GCLocals, &s.Func.GCRegs} {
if seen[gcsym.Name] { if seen[gcsym.Name] {
continue continue
} }
......
...@@ -594,6 +594,20 @@ func onebitwalktype1(t *types.Type, off int64, bv bvec) { ...@@ -594,6 +594,20 @@ func onebitwalktype1(t *types.Type, off int64, bv bvec) {
} }
} }
// usedRegs returns the maximum width of the live register map.
func (lv *Liveness) usedRegs() int32 {
var any liveRegMask
for _, live := range lv.regMaps {
any |= live
}
i := int32(0)
for any != 0 {
any >>= 1
i++
}
return i
}
// Generates live pointer value maps for arguments and local variables. The // Generates live pointer value maps for arguments and local variables. The
// this argument and the in arguments are always assumed live. The vars // this argument and the in arguments are always assumed live. The vars
// argument is a slice of *Nodes. // argument is a slice of *Nodes.
...@@ -1615,7 +1629,7 @@ func (lv *Liveness) printDebug() { ...@@ -1615,7 +1629,7 @@ func (lv *Liveness) printDebug() {
// first word dumped is the total number of bitmaps. The second word is the // first word dumped is the total number of bitmaps. The second word is the
// length of the bitmaps. All bitmaps are assumed to be of equal length. The // length of the bitmaps. All bitmaps are assumed to be of equal length. The
// remaining bytes are the raw bitmaps. // remaining bytes are the raw bitmaps.
func (lv *Liveness) emit(argssym, livesym *obj.LSym) { func (lv *Liveness) emit(argssym, livesym, regssym *obj.LSym) {
// Size args bitmaps to be just large enough to hold the largest pointer. // Size args bitmaps to be just large enough to hold the largest pointer.
// First, find the largest Xoffset node we care about. // First, find the largest Xoffset node we care about.
// (Nodes without pointers aren't in lv.vars; see livenessShouldTrack.) // (Nodes without pointers aren't in lv.vars; see livenessShouldTrack.)
...@@ -1643,7 +1657,6 @@ func (lv *Liveness) emit(argssym, livesym *obj.LSym) { ...@@ -1643,7 +1657,6 @@ func (lv *Liveness) emit(argssym, livesym *obj.LSym) {
// This would require shifting all bitmaps. // This would require shifting all bitmaps.
maxLocals := lv.stkptrsize maxLocals := lv.stkptrsize
// TODO(austin): Emit a register map.
args := bvalloc(int32(maxArgs / int64(Widthptr))) args := bvalloc(int32(maxArgs / int64(Widthptr)))
aoff := duint32(argssym, 0, uint32(len(lv.stackMaps))) // number of bitmaps aoff := duint32(argssym, 0, uint32(len(lv.stackMaps))) // number of bitmaps
aoff = duint32(argssym, aoff, uint32(args.n)) // number of bits in each bitmap aoff = duint32(argssym, aoff, uint32(args.n)) // number of bits in each bitmap
...@@ -1662,6 +1675,22 @@ func (lv *Liveness) emit(argssym, livesym *obj.LSym) { ...@@ -1662,6 +1675,22 @@ func (lv *Liveness) emit(argssym, livesym *obj.LSym) {
loff = dbvec(livesym, loff, locals) loff = dbvec(livesym, loff, locals)
} }
regs := bvalloc(lv.usedRegs())
roff := duint32(regssym, 0, uint32(len(lv.regMaps))) // number of bitmaps
roff = duint32(regssym, roff, uint32(regs.n)) // number of bits in each bitmap
if regs.n > 32 {
// Our uint32 conversion below won't work.
Fatalf("GP registers overflow uint32")
}
if regs.n > 0 {
for _, live := range lv.regMaps {
regs.Clear()
regs.b[0] = uint32(live)
roff = dbvec(regssym, roff, regs)
}
}
// Give these LSyms content-addressable names, // Give these LSyms content-addressable names,
// so that they can be de-duplicated. // so that they can be de-duplicated.
// This provides significant binary size savings. // This provides significant binary size savings.
...@@ -1669,6 +1698,7 @@ func (lv *Liveness) emit(argssym, livesym *obj.LSym) { ...@@ -1669,6 +1698,7 @@ func (lv *Liveness) emit(argssym, livesym *obj.LSym) {
// they are tracked separately from ctxt.hash. // they are tracked separately from ctxt.hash.
argssym.Name = fmt.Sprintf("gclocals·%x", md5.Sum(argssym.P)) argssym.Name = fmt.Sprintf("gclocals·%x", md5.Sum(argssym.P))
livesym.Name = fmt.Sprintf("gclocals·%x", md5.Sum(livesym.P)) livesym.Name = fmt.Sprintf("gclocals·%x", md5.Sum(livesym.P))
regssym.Name = fmt.Sprintf("gclocals·%x", md5.Sum(regssym.P))
} }
// Entry pointer for liveness analysis. Solves for the liveness of // Entry pointer for liveness analysis. Solves for the liveness of
...@@ -1692,7 +1722,7 @@ func liveness(e *ssafn, f *ssa.Func) LivenessMap { ...@@ -1692,7 +1722,7 @@ func liveness(e *ssafn, f *ssa.Func) LivenessMap {
// Emit the live pointer map data structures // Emit the live pointer map data structures
if ls := e.curfn.Func.lsym; ls != nil { if ls := e.curfn.Func.lsym; ls != nil {
lv.emit(&ls.Func.GCArgs, &ls.Func.GCLocals) lv.emit(&ls.Func.GCArgs, &ls.Func.GCLocals, &ls.Func.GCRegs)
} }
return lv.livenessMap return lv.livenessMap
} }
...@@ -404,6 +404,7 @@ type FuncInfo struct { ...@@ -404,6 +404,7 @@ type FuncInfo struct {
GCArgs LSym GCArgs LSym
GCLocals LSym GCLocals LSym
GCRegs LSym
} }
// Attribute is a set of symbol attributes. // Attribute is a set of symbol attributes.
......
...@@ -159,6 +159,9 @@ func (ctxt *Link) InitTextSym(s *LSym, flag int) { ...@@ -159,6 +159,9 @@ func (ctxt *Link) InitTextSym(s *LSym, flag int) {
gclocals := &s.Func.GCLocals gclocals := &s.Func.GCLocals
gclocals.Set(AttrDuplicateOK, true) gclocals.Set(AttrDuplicateOK, true)
gclocals.Type = objabi.SRODATA gclocals.Type = objabi.SRODATA
gcregs := &s.Func.GCRegs
gcregs.Set(AttrDuplicateOK, true)
gcregs.Type = objabi.SRODATA
} }
func (ctxt *Link) Globl(s *LSym, size int64, flag int) { func (ctxt *Link) Globl(s *LSym, size int64, flag int) {
...@@ -203,5 +206,14 @@ func (ctxt *Link) EmitEntryLiveness(s *LSym, p *Prog, newprog ProgAlloc) *Prog { ...@@ -203,5 +206,14 @@ func (ctxt *Link) EmitEntryLiveness(s *LSym, p *Prog, newprog ProgAlloc) *Prog {
pcdata.To.Type = TYPE_CONST pcdata.To.Type = TYPE_CONST
pcdata.To.Offset = -1 // pcdata starts at -1 at function entry pcdata.To.Offset = -1 // pcdata starts at -1 at function entry
// Same, with register map.
pcdata = Appendp(pcdata, newprog)
pcdata.Pos = s.Func.Text.Pos
pcdata.As = APCDATA
pcdata.From.Type = TYPE_CONST
pcdata.From.Offset = objabi.PCDATA_RegMapIndex
pcdata.To.Type = TYPE_CONST
pcdata.To.Offset = -1
return pcdata return pcdata
} }
...@@ -13,9 +13,11 @@ package objabi ...@@ -13,9 +13,11 @@ package objabi
const ( const (
PCDATA_StackMapIndex = 0 PCDATA_StackMapIndex = 0
PCDATA_InlTreeIndex = 1 PCDATA_InlTreeIndex = 1
PCDATA_RegMapIndex = 2
FUNCDATA_ArgsPointerMaps = 0 FUNCDATA_ArgsPointerMaps = 0
FUNCDATA_LocalsPointerMaps = 1 FUNCDATA_LocalsPointerMaps = 1
FUNCDATA_InlTree = 2 FUNCDATA_InlTree = 2
FUNCDATA_RegPointerMaps = 3
// ArgsSizeUnknown is set in Func.argsize to mark all functions // ArgsSizeUnknown is set in Func.argsize to mark all functions
// whose argument size is unknown (C vararg functions, and // whose argument size is unknown (C vararg functions, and
......
...@@ -10,10 +10,12 @@ ...@@ -10,10 +10,12 @@
#define PCDATA_StackMapIndex 0 #define PCDATA_StackMapIndex 0
#define PCDATA_InlTreeIndex 1 #define PCDATA_InlTreeIndex 1
#define PCDATA_RegMapIndex 2
#define FUNCDATA_ArgsPointerMaps 0 /* garbage collector blocks */ #define FUNCDATA_ArgsPointerMaps 0 /* garbage collector blocks */
#define FUNCDATA_LocalsPointerMaps 1 #define FUNCDATA_LocalsPointerMaps 1
#define FUNCDATA_InlTree 2 #define FUNCDATA_InlTree 2
#define FUNCDATA_RegPointerMaps 3
// Pseudo-assembly statements. // Pseudo-assembly statements.
......
...@@ -343,9 +343,11 @@ func (f *Func) funcInfo() funcInfo { ...@@ -343,9 +343,11 @@ func (f *Func) funcInfo() funcInfo {
const ( const (
_PCDATA_StackMapIndex = 0 _PCDATA_StackMapIndex = 0
_PCDATA_InlTreeIndex = 1 _PCDATA_InlTreeIndex = 1
_PCDATA_RegMapIndex = 2
_FUNCDATA_ArgsPointerMaps = 0 _FUNCDATA_ArgsPointerMaps = 0
_FUNCDATA_LocalsPointerMaps = 1 _FUNCDATA_LocalsPointerMaps = 1
_FUNCDATA_InlTree = 2 _FUNCDATA_InlTree = 2
_FUNCDATA_RegPointerMaps = 3
_ArgsSizeUnknown = -0x80000000 _ArgsSizeUnknown = -0x80000000
) )
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment