Commit 3c36b8be authored by Austin Clements's avatar Austin Clements

cmd/compile: incrementally compact liveness maps

The per-Value slice of liveness maps is currently one of the largest
sources of allocation in the compiler. On cmd/compile/internal/ssa,
it's 5% of overall allocation, or 75MB in total. Enabling liveness
maps everywhere significantly increased this allocation footprint,
which in turn slowed down the compiler.

Improve this by compacting the liveness maps after every block is
processed. There are typically very few distinct liveness maps, so
compacting the maps after every block, rather than at the end of the
function, can significantly reduce these allocations.

Passes toolstash -cmp.

name        old time/op       new time/op       delta
Template          198ms ± 2%        196ms ± 1%  -1.11%  (p=0.008 n=9+10)
Unicode           100ms ± 1%         99ms ± 1%  -0.94%  (p=0.015 n=8+9)
GoTypes           703ms ± 2%        695ms ± 1%  -1.15%  (p=0.000 n=10+10)
Compiler          3.38s ± 3%        3.33s ± 0%  -1.66%  (p=0.000 n=10+9)
SSA               7.96s ± 1%        7.93s ± 1%    ~ 	(p=0.113 n=9+10)
Flate             134ms ± 1%        132ms ± 1%  -1.30%  (p=0.000 n=8+10)
GoParser          165ms ± 2%        163ms ± 1%  -1.32%  (p=0.013 n=9+10)
Reflect           462ms ± 2%        459ms ± 0%  -0.65%  (p=0.036 n=9+8)
Tar               188ms ± 2%        186ms ± 1%    ~     (p=0.173 n=8+10)
XML               243ms ± 7%        239ms ± 1%    ~     (p=0.684 n=10+10)
[Geo mean]        421ms             416ms       -1.10%

name        old alloc/op      new alloc/op      delta
Template         38.0MB ± 0%       36.5MB ± 0%  -3.98%  (p=0.000 n=10+10)
Unicode          30.3MB ± 0%       29.6MB ± 0%  -2.21% 	(p=0.000 n=10+10)
GoTypes           125MB ± 0%        120MB ± 0%  -4.51% 	(p=0.000 n=10+9)
Compiler          575MB ± 0%        546MB ± 0%  -5.06% 	(p=0.000 n=10+10)
SSA              1.64GB ± 0%       1.55GB ± 0%  -4.97% 	(p=0.000 n=10+10)
Flate            25.9MB ± 0%       25.0MB ± 0%  -3.41% 	(p=0.000 n=10+10)
GoParser         30.7MB ± 0%       29.5MB ± 0%  -3.97% 	(p=0.000 n=10+10)
Reflect          84.1MB ± 0%       81.9MB ± 0%  -2.64% 	(p=0.000 n=10+10)
Tar              37.0MB ± 0%       35.8MB ± 0%  -3.27% 	(p=0.000 n=10+9)
XML              47.2MB ± 0%       45.0MB ± 0%  -4.57% 	(p=0.000 n=10+10)
[Geo mean]       83.2MB            79.9MB       -3.86%

name        old allocs/op     new allocs/op     delta
Template           337k ± 0%         337k ± 0%  -0.06%  (p=0.000 n=10+10)
Unicode            340k ± 0%         340k ± 0%  -0.01% 	(p=0.014 n=10+10)
GoTypes           1.18M ± 0%        1.18M ± 0%  -0.04% 	(p=0.000 n=10+10)
Compiler          4.97M ± 0%        4.97M ± 0%  -0.03% 	(p=0.000 n=10+10)
SSA               12.3M ± 0%        12.3M ± 0%  -0.01% 	(p=0.000 n=10+10)
Flate              226k ± 0%         225k ± 0%  -0.09% 	(p=0.000 n=10+10)
GoParser           283k ± 0%         283k ± 0%  -0.06% 	(p=0.000 n=10+9)
Reflect            972k ± 0%         971k ± 0%  -0.04% 	(p=0.000 n=10+8)
Tar                333k ± 0%         332k ± 0%  -0.05% 	(p=0.000 n=10+9)
XML                395k ± 0%         395k ± 0%  -0.04% 	(p=0.000 n=10+10)
[Geo mean]         764k              764k       -0.04%

Updates #24543.

Change-Id: I6fdc46e4ddb6a8eea95d38242345205eb8397f0b
Reviewed-on: https://go-review.googlesource.com/110177
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 3c4aaf8a
......@@ -267,7 +267,7 @@ func (m *bvecSet) grow() {
m.index = newIndex
}
// add adds bv to the set and returns its index in m.uniq.
// add adds bv to the set and returns its index in m.extractUniqe.
// The caller must not modify bv after this.
func (m *bvecSet) add(bv bvec) int {
if len(m.uniq)*4 >= len(m.index) {
......@@ -296,3 +296,9 @@ func (m *bvecSet) add(bv bvec) int {
}
}
}
// extractUniqe returns this slice of unique bit vectors in m, as
// indexed by the result of bvecSet.add.
func (m *bvecSet) extractUniqe() []bvec {
return m.uniq
}
......@@ -117,8 +117,11 @@ type Liveness struct {
// unsafePoints bit i is set if Value ID i is not a safe point.
unsafePoints bvec
// An array with a bit vector for each safe point tracking live variables.
// Indexed sequentially by safe points in Block and Value order.
// An array with a bit vector for each safe point in the
// current Block during Liveness.epilogue. Indexed in Value
// order for that block. Additionally, for the entry block
// livevars[0] is the entry bitmap. Liveness.compact moves
// these to stackMaps and regMaps.
livevars []varRegVec
// livenessMap maps from safe points (i.e., CALLs) to their
......@@ -127,7 +130,9 @@ type Liveness struct {
// TODO(austin): Now that we have liveness at almost every PC,
// should this be a dense structure?
livenessMap LivenessMap
stackMapSet bvecSet
stackMaps []bvec
regMapSet map[liveRegMask]int
regMaps []liveRegMask
cache progeffectscache
......@@ -491,6 +496,9 @@ func newliveness(fn *Node, f *ssa.Func, vars []*Node, idx map[*Node]int32, stkpt
idx: idx,
stkptrsize: stkptrsize,
be: make([]BlockEffects, f.NumBlocks()),
livenessMap: LivenessMap{make(map[*ssa.Value]LivenessIndex)},
regMapSet: make(map[liveRegMask]int),
}
nblocks := int32(len(f.Blocks))
......@@ -975,6 +983,13 @@ func (lv *Liveness) epilogue() {
}
}
// We must analyze the entry block first. The runtime assumes
// the function entry map is index 0. Conveniently, layout
// already ensured that the entry block is first.
if lv.f.Entry != lv.f.Blocks[0] {
lv.f.Fatalf("entry block must be first")
}
{
// Reserve an entry for function entry.
live := bvalloc(nvars)
......@@ -1040,11 +1055,6 @@ func (lv *Liveness) epilogue() {
// walk backward, construct maps at each safe point
index := int32(len(lv.livevars) - 1)
if index < 0 {
// the first block we encounter should have the ATEXT so
// at no point should pos ever be less than zero.
Fatalf("livenessepilogue")
}
liveout.Copy(be.liveout)
for i := len(b.Values) - 1; i >= 0; i-- {
......@@ -1097,13 +1107,20 @@ func (lv *Liveness) epilogue() {
index++
}
}
// The liveness maps for this block are now complete. Compact them.
lv.compact(b)
}
// Done compacting. Throw out the stack map set.
lv.stackMaps = lv.stackMapSet.extractUniqe()
lv.stackMapSet = bvecSet{}
// Useful sanity check: on entry to the function,
// the only things that can possibly be live are the
// input parameters.
for j, n := range lv.vars {
if n.Class() != PPARAM && lv.livevars[0].vars.Get(int32(j)) {
if n.Class() != PPARAM && lv.stackMaps[0].Get(int32(j)) {
Fatalf("internal error: %v %L recorded as live on entry", lv.fn.Func.Nname, n)
}
}
......@@ -1111,7 +1128,7 @@ func (lv *Liveness) epilogue() {
// The context register, if any, comes from a
// LoweredGetClosurePtr operation first thing in the function,
// so it doesn't appear live at entry.
if regs := lv.livevars[0].regs; regs != 0 {
if regs := lv.regMaps[0]; regs != 0 {
lv.printDebug()
lv.f.Fatalf("internal error: %v register %s recorded as live on entry", lv.fn.Func.Nname, regs.niceString(lv.f.Config))
}
......@@ -1292,8 +1309,10 @@ func (lv *Liveness) avarinitanyall(b *ssa.Block, any, all bvec) {
}
}
// Compact liveness information by coalescing identical per-call-site bitmaps.
// The merging only happens for a single function, not across the entire binary.
// Compact coalesces identical bitmaps from lv.livevars into the sets
// lv.stackMapSet and lv.regMaps.
//
// Compact clears lv.livevars.
//
// There are actually two lists of bitmaps, one list for the local variables and one
// list for the function arguments. Both lists are indexed by the same PCDATA
......@@ -1306,47 +1325,34 @@ func (lv *Liveness) avarinitanyall(b *ssa.Block, any, all bvec) {
// is actually a net loss: we save about 50k of argument bitmaps but the new
// PCDATA tables cost about 100k. So for now we keep using a single index for
// both bitmap lists.
func (lv *Liveness) compact() {
// Compact livevars.
// remap[i] = the index in lv.stackMaps of for bitmap lv.livevars[i].
remap := make([]int, len(lv.livevars))
set := newBvecSet(len(lv.livevars))
for i, live := range lv.livevars {
remap[i] = set.add(live.vars)
}
lv.stackMaps = set.uniq
// Compact register maps.
remapRegs := make([]int, len(lv.livevars))
regMaps := make(map[liveRegMask]int)
for i, live := range lv.livevars {
idx, ok := regMaps[live.regs]
func (lv *Liveness) compact(b *ssa.Block) {
add := func(live varRegVec) LivenessIndex {
// Deduplicate the stack map.
stackIndex := lv.stackMapSet.add(live.vars)
// Deduplicate the register map.
regIndex, ok := lv.regMapSet[live.regs]
if !ok {
idx = len(regMaps)
regMaps[live.regs] = idx
regIndex = len(lv.regMapSet)
lv.regMapSet[live.regs] = regIndex
lv.regMaps = append(lv.regMaps, live.regs)
}
remapRegs[i] = idx
return LivenessIndex{stackIndex, regIndex}
}
// Clear lv.livevars to allow GC of duplicate maps and to
// prevent accidental use.
lv.livevars = nil
// Record compacted stack map indexes for each value.
// These will later become PCDATA instructions.
lv.showlive(nil, lv.stackMaps[0])
pos := 1
lv.livenessMap = LivenessMap{make(map[*ssa.Value]LivenessIndex)}
for _, b := range lv.f.Blocks {
for _, v := range b.Values {
if lv.issafepoint(v) {
lv.showlive(v, lv.stackMaps[remap[pos]])
lv.livenessMap.m[v] = LivenessIndex{remap[pos], remapRegs[pos]}
pos++
}
pos := 0
if b == lv.f.Entry {
// Handle entry stack map.
add(lv.livevars[0])
pos++
}
for _, v := range b.Values {
if lv.issafepoint(v) {
lv.livenessMap.m[v] = add(lv.livevars[pos])
pos++
}
}
// Reset livevars.
lv.livevars = lv.livevars[:0]
}
func (lv *Liveness) showlive(v *ssa.Value, live bvec) {
......@@ -1647,8 +1653,13 @@ func liveness(e *ssafn, f *ssa.Func) LivenessMap {
lv.prologue()
lv.solve()
lv.epilogue()
lv.compact()
lv.clobber()
if debuglive > 0 {
lv.showlive(nil, lv.stackMaps[0])
for val, idx := range lv.livenessMap.m {
lv.showlive(val, lv.stackMaps[idx.stackMapIndex])
}
}
if debuglive >= 2 {
lv.printDebug()
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment