Commit 3c36b8be authored by Austin Clements's avatar Austin Clements

cmd/compile: incrementally compact liveness maps

The per-Value slice of liveness maps is currently one of the largest
sources of allocation in the compiler. On cmd/compile/internal/ssa,
it's 5% of overall allocation, or 75MB in total. Enabling liveness
maps everywhere significantly increased this allocation footprint,
which in turn slowed down the compiler.

Improve this by compacting the liveness maps after every block is
processed. There are typically very few distinct liveness maps, so
compacting the maps after every block, rather than at the end of the
function, can significantly reduce these allocations.

Passes toolstash -cmp.

name        old time/op       new time/op       delta
Template          198ms ± 2%        196ms ± 1%  -1.11%  (p=0.008 n=9+10)
Unicode           100ms ± 1%         99ms ± 1%  -0.94%  (p=0.015 n=8+9)
GoTypes           703ms ± 2%        695ms ± 1%  -1.15%  (p=0.000 n=10+10)
Compiler          3.38s ± 3%        3.33s ± 0%  -1.66%  (p=0.000 n=10+9)
SSA               7.96s ± 1%        7.93s ± 1%    ~ 	(p=0.113 n=9+10)
Flate             134ms ± 1%        132ms ± 1%  -1.30%  (p=0.000 n=8+10)
GoParser          165ms ± 2%        163ms ± 1%  -1.32%  (p=0.013 n=9+10)
Reflect           462ms ± 2%        459ms ± 0%  -0.65%  (p=0.036 n=9+8)
Tar               188ms ± 2%        186ms ± 1%    ~     (p=0.173 n=8+10)
XML               243ms ± 7%        239ms ± 1%    ~     (p=0.684 n=10+10)
[Geo mean]        421ms             416ms       -1.10%

name        old alloc/op      new alloc/op      delta
Template         38.0MB ± 0%       36.5MB ± 0%  -3.98%  (p=0.000 n=10+10)
Unicode          30.3MB ± 0%       29.6MB ± 0%  -2.21% 	(p=0.000 n=10+10)
GoTypes           125MB ± 0%        120MB ± 0%  -4.51% 	(p=0.000 n=10+9)
Compiler          575MB ± 0%        546MB ± 0%  -5.06% 	(p=0.000 n=10+10)
SSA              1.64GB ± 0%       1.55GB ± 0%  -4.97% 	(p=0.000 n=10+10)
Flate            25.9MB ± 0%       25.0MB ± 0%  -3.41% 	(p=0.000 n=10+10)
GoParser         30.7MB ± 0%       29.5MB ± 0%  -3.97% 	(p=0.000 n=10+10)
Reflect          84.1MB ± 0%       81.9MB ± 0%  -2.64% 	(p=0.000 n=10+10)
Tar              37.0MB ± 0%       35.8MB ± 0%  -3.27% 	(p=0.000 n=10+9)
XML              47.2MB ± 0%       45.0MB ± 0%  -4.57% 	(p=0.000 n=10+10)
[Geo mean]       83.2MB            79.9MB       -3.86%

name        old allocs/op     new allocs/op     delta
Template           337k ± 0%         337k ± 0%  -0.06%  (p=0.000 n=10+10)
Unicode            340k ± 0%         340k ± 0%  -0.01% 	(p=0.014 n=10+10)
GoTypes           1.18M ± 0%        1.18M ± 0%  -0.04% 	(p=0.000 n=10+10)
Compiler          4.97M ± 0%        4.97M ± 0%  -0.03% 	(p=0.000 n=10+10)
SSA               12.3M ± 0%        12.3M ± 0%  -0.01% 	(p=0.000 n=10+10)
Flate              226k ± 0%         225k ± 0%  -0.09% 	(p=0.000 n=10+10)
GoParser           283k ± 0%         283k ± 0%  -0.06% 	(p=0.000 n=10+9)
Reflect            972k ± 0%         971k ± 0%  -0.04% 	(p=0.000 n=10+8)
Tar                333k ± 0%         332k ± 0%  -0.05% 	(p=0.000 n=10+9)
XML                395k ± 0%         395k ± 0%  -0.04% 	(p=0.000 n=10+10)
[Geo mean]         764k              764k       -0.04%

Updates #24543.

Change-Id: I6fdc46e4ddb6a8eea95d38242345205eb8397f0b
Reviewed-on: https://go-review.googlesource.com/110177
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 3c4aaf8a
...@@ -267,7 +267,7 @@ func (m *bvecSet) grow() { ...@@ -267,7 +267,7 @@ func (m *bvecSet) grow() {
m.index = newIndex m.index = newIndex
} }
// add adds bv to the set and returns its index in m.uniq. // add adds bv to the set and returns its index in m.extractUniqe.
// The caller must not modify bv after this. // The caller must not modify bv after this.
func (m *bvecSet) add(bv bvec) int { func (m *bvecSet) add(bv bvec) int {
if len(m.uniq)*4 >= len(m.index) { if len(m.uniq)*4 >= len(m.index) {
...@@ -296,3 +296,9 @@ func (m *bvecSet) add(bv bvec) int { ...@@ -296,3 +296,9 @@ func (m *bvecSet) add(bv bvec) int {
} }
} }
} }
// extractUniqe returns this slice of unique bit vectors in m, as
// indexed by the result of bvecSet.add.
func (m *bvecSet) extractUniqe() []bvec {
return m.uniq
}
...@@ -117,8 +117,11 @@ type Liveness struct { ...@@ -117,8 +117,11 @@ type Liveness struct {
// unsafePoints bit i is set if Value ID i is not a safe point. // unsafePoints bit i is set if Value ID i is not a safe point.
unsafePoints bvec unsafePoints bvec
// An array with a bit vector for each safe point tracking live variables. // An array with a bit vector for each safe point in the
// Indexed sequentially by safe points in Block and Value order. // current Block during Liveness.epilogue. Indexed in Value
// order for that block. Additionally, for the entry block
// livevars[0] is the entry bitmap. Liveness.compact moves
// these to stackMaps and regMaps.
livevars []varRegVec livevars []varRegVec
// livenessMap maps from safe points (i.e., CALLs) to their // livenessMap maps from safe points (i.e., CALLs) to their
...@@ -127,7 +130,9 @@ type Liveness struct { ...@@ -127,7 +130,9 @@ type Liveness struct {
// TODO(austin): Now that we have liveness at almost every PC, // TODO(austin): Now that we have liveness at almost every PC,
// should this be a dense structure? // should this be a dense structure?
livenessMap LivenessMap livenessMap LivenessMap
stackMapSet bvecSet
stackMaps []bvec stackMaps []bvec
regMapSet map[liveRegMask]int
regMaps []liveRegMask regMaps []liveRegMask
cache progeffectscache cache progeffectscache
...@@ -491,6 +496,9 @@ func newliveness(fn *Node, f *ssa.Func, vars []*Node, idx map[*Node]int32, stkpt ...@@ -491,6 +496,9 @@ func newliveness(fn *Node, f *ssa.Func, vars []*Node, idx map[*Node]int32, stkpt
idx: idx, idx: idx,
stkptrsize: stkptrsize, stkptrsize: stkptrsize,
be: make([]BlockEffects, f.NumBlocks()), be: make([]BlockEffects, f.NumBlocks()),
livenessMap: LivenessMap{make(map[*ssa.Value]LivenessIndex)},
regMapSet: make(map[liveRegMask]int),
} }
nblocks := int32(len(f.Blocks)) nblocks := int32(len(f.Blocks))
...@@ -975,6 +983,13 @@ func (lv *Liveness) epilogue() { ...@@ -975,6 +983,13 @@ func (lv *Liveness) epilogue() {
} }
} }
// We must analyze the entry block first. The runtime assumes
// the function entry map is index 0. Conveniently, layout
// already ensured that the entry block is first.
if lv.f.Entry != lv.f.Blocks[0] {
lv.f.Fatalf("entry block must be first")
}
{ {
// Reserve an entry for function entry. // Reserve an entry for function entry.
live := bvalloc(nvars) live := bvalloc(nvars)
...@@ -1040,11 +1055,6 @@ func (lv *Liveness) epilogue() { ...@@ -1040,11 +1055,6 @@ func (lv *Liveness) epilogue() {
// walk backward, construct maps at each safe point // walk backward, construct maps at each safe point
index := int32(len(lv.livevars) - 1) index := int32(len(lv.livevars) - 1)
if index < 0 {
// the first block we encounter should have the ATEXT so
// at no point should pos ever be less than zero.
Fatalf("livenessepilogue")
}
liveout.Copy(be.liveout) liveout.Copy(be.liveout)
for i := len(b.Values) - 1; i >= 0; i-- { for i := len(b.Values) - 1; i >= 0; i-- {
...@@ -1097,13 +1107,20 @@ func (lv *Liveness) epilogue() { ...@@ -1097,13 +1107,20 @@ func (lv *Liveness) epilogue() {
index++ index++
} }
} }
// The liveness maps for this block are now complete. Compact them.
lv.compact(b)
} }
// Done compacting. Throw out the stack map set.
lv.stackMaps = lv.stackMapSet.extractUniqe()
lv.stackMapSet = bvecSet{}
// Useful sanity check: on entry to the function, // Useful sanity check: on entry to the function,
// the only things that can possibly be live are the // the only things that can possibly be live are the
// input parameters. // input parameters.
for j, n := range lv.vars { for j, n := range lv.vars {
if n.Class() != PPARAM && lv.livevars[0].vars.Get(int32(j)) { if n.Class() != PPARAM && lv.stackMaps[0].Get(int32(j)) {
Fatalf("internal error: %v %L recorded as live on entry", lv.fn.Func.Nname, n) Fatalf("internal error: %v %L recorded as live on entry", lv.fn.Func.Nname, n)
} }
} }
...@@ -1111,7 +1128,7 @@ func (lv *Liveness) epilogue() { ...@@ -1111,7 +1128,7 @@ func (lv *Liveness) epilogue() {
// The context register, if any, comes from a // The context register, if any, comes from a
// LoweredGetClosurePtr operation first thing in the function, // LoweredGetClosurePtr operation first thing in the function,
// so it doesn't appear live at entry. // so it doesn't appear live at entry.
if regs := lv.livevars[0].regs; regs != 0 { if regs := lv.regMaps[0]; regs != 0 {
lv.printDebug() lv.printDebug()
lv.f.Fatalf("internal error: %v register %s recorded as live on entry", lv.fn.Func.Nname, regs.niceString(lv.f.Config)) lv.f.Fatalf("internal error: %v register %s recorded as live on entry", lv.fn.Func.Nname, regs.niceString(lv.f.Config))
} }
...@@ -1292,8 +1309,10 @@ func (lv *Liveness) avarinitanyall(b *ssa.Block, any, all bvec) { ...@@ -1292,8 +1309,10 @@ func (lv *Liveness) avarinitanyall(b *ssa.Block, any, all bvec) {
} }
} }
// Compact liveness information by coalescing identical per-call-site bitmaps. // Compact coalesces identical bitmaps from lv.livevars into the sets
// The merging only happens for a single function, not across the entire binary. // lv.stackMapSet and lv.regMaps.
//
// Compact clears lv.livevars.
// //
// There are actually two lists of bitmaps, one list for the local variables and one // There are actually two lists of bitmaps, one list for the local variables and one
// list for the function arguments. Both lists are indexed by the same PCDATA // list for the function arguments. Both lists are indexed by the same PCDATA
...@@ -1306,47 +1325,34 @@ func (lv *Liveness) avarinitanyall(b *ssa.Block, any, all bvec) { ...@@ -1306,47 +1325,34 @@ func (lv *Liveness) avarinitanyall(b *ssa.Block, any, all bvec) {
// is actually a net loss: we save about 50k of argument bitmaps but the new // is actually a net loss: we save about 50k of argument bitmaps but the new
// PCDATA tables cost about 100k. So for now we keep using a single index for // PCDATA tables cost about 100k. So for now we keep using a single index for
// both bitmap lists. // both bitmap lists.
func (lv *Liveness) compact() { func (lv *Liveness) compact(b *ssa.Block) {
// Compact livevars. add := func(live varRegVec) LivenessIndex {
// remap[i] = the index in lv.stackMaps of for bitmap lv.livevars[i]. // Deduplicate the stack map.
remap := make([]int, len(lv.livevars)) stackIndex := lv.stackMapSet.add(live.vars)
set := newBvecSet(len(lv.livevars)) // Deduplicate the register map.
for i, live := range lv.livevars { regIndex, ok := lv.regMapSet[live.regs]
remap[i] = set.add(live.vars)
}
lv.stackMaps = set.uniq
// Compact register maps.
remapRegs := make([]int, len(lv.livevars))
regMaps := make(map[liveRegMask]int)
for i, live := range lv.livevars {
idx, ok := regMaps[live.regs]
if !ok { if !ok {
idx = len(regMaps) regIndex = len(lv.regMapSet)
regMaps[live.regs] = idx lv.regMapSet[live.regs] = regIndex
lv.regMaps = append(lv.regMaps, live.regs) lv.regMaps = append(lv.regMaps, live.regs)
} }
remapRegs[i] = idx return LivenessIndex{stackIndex, regIndex}
} }
pos := 0
// Clear lv.livevars to allow GC of duplicate maps and to if b == lv.f.Entry {
// prevent accidental use. // Handle entry stack map.
lv.livevars = nil add(lv.livevars[0])
pos++
// Record compacted stack map indexes for each value. }
// These will later become PCDATA instructions. for _, v := range b.Values {
lv.showlive(nil, lv.stackMaps[0]) if lv.issafepoint(v) {
pos := 1 lv.livenessMap.m[v] = add(lv.livevars[pos])
lv.livenessMap = LivenessMap{make(map[*ssa.Value]LivenessIndex)} pos++
for _, b := range lv.f.Blocks {
for _, v := range b.Values {
if lv.issafepoint(v) {
lv.showlive(v, lv.stackMaps[remap[pos]])
lv.livenessMap.m[v] = LivenessIndex{remap[pos], remapRegs[pos]}
pos++
}
} }
} }
// Reset livevars.
lv.livevars = lv.livevars[:0]
} }
func (lv *Liveness) showlive(v *ssa.Value, live bvec) { func (lv *Liveness) showlive(v *ssa.Value, live bvec) {
...@@ -1647,8 +1653,13 @@ func liveness(e *ssafn, f *ssa.Func) LivenessMap { ...@@ -1647,8 +1653,13 @@ func liveness(e *ssafn, f *ssa.Func) LivenessMap {
lv.prologue() lv.prologue()
lv.solve() lv.solve()
lv.epilogue() lv.epilogue()
lv.compact()
lv.clobber() lv.clobber()
if debuglive > 0 {
lv.showlive(nil, lv.stackMaps[0])
for val, idx := range lv.livenessMap.m {
lv.showlive(val, lv.stackMaps[idx.stackMapIndex])
}
}
if debuglive >= 2 { if debuglive >= 2 {
lv.printDebug() lv.printDebug()
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment