Commit c4ef597c authored by Cherry Zhang's avatar Cherry Zhang

cmd/compile: redo writebarrier pass

SSA's writebarrier pass requires WB store ops are always at the
end of a block. If we move write barrier insertion into SSA and
emits normal Store ops when building SSA, this requirement becomes
impractical -- it will create too many blocks for all the Store
ops.

Redo SSA's writebarrier pass, explicitly order values in store
order, so it no longer needs this requirement.

Updates #17583.
Fixes #19067.

Change-Id: I66e817e526affb7e13517d4245905300a90b7170
Reviewed-on: https://go-review.googlesource.com/36834
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarDavid Chase <drchase@google.com>
parent 98061fa5
......@@ -3438,14 +3438,6 @@ func (s *state) insertWBmove(t *Type, left, right *ssa.Value, rightIsVolatile bo
}
val.Aux = &ssa.ExternSymbol{Typ: Types[TUINTPTR], Sym: Linksym(typenamesym(t))}
s.vars[&memVar] = val
// WB ops will be expanded to branches at writebarrier phase.
// To make it easy, we put WB ops at the end of a block, so
// that it does not need to split a block into two parts when
// expanding WB ops.
b := s.f.NewBlock(ssa.BlockPlain)
s.endBlock().AddEdgeTo(b)
s.startBlock(b)
}
// insertWBstore inserts the assignment *left = right including a write barrier.
......@@ -3466,14 +3458,6 @@ func (s *state) insertWBstore(t *Type, left, right *ssa.Value, skip skipMask) {
}
s.storeTypeScalars(t, left, right, skip)
s.storeTypePtrsWB(t, left, right)
// WB ops will be expanded to branches at writebarrier phase.
// To make it easy, we put WB ops at the end of a block, so
// that it does not need to split a block into two parts when
// expanding WB ops.
b := s.f.NewBlock(ssa.BlockPlain)
s.endBlock().AddEdgeTo(b)
s.startBlock(b)
}
// do *left = right for all scalar (non-pointer) parts of t.
......
......@@ -227,163 +227,3 @@ func nilcheckelim2(f *Func) {
// more unnecessary nil checks. Would fix test/nilptr3_ssa.go:157.
}
}
// storeOrder orders values with respect to stores. That is,
// if v transitively depends on store s, v is ordered after s,
// otherwise v is ordered before s.
// Specifically, values are ordered like
// store1
// NilCheck that depends on store1
// other values that depends on store1
// store2
// NilCheck that depends on store2
// other values that depends on store2
// ...
// The order of non-store and non-NilCheck values are undefined
// (not necessarily dependency order). This should be cheaper
// than a full scheduling as done in schedule.go.
// Note that simple dependency order won't work: there is no
// dependency between NilChecks and values like IsNonNil.
// Auxiliary data structures are passed in as arguments, so
// that they can be allocated in the caller and be reused.
// This function takes care of reset them.
func storeOrder(values []*Value, sset *sparseSet, storeNumber []int32) []*Value {
// find all stores
var stores []*Value // members of values that are store values
hasNilCheck := false
sset.clear() // sset is the set of stores that are used in other values
for _, v := range values {
if v.Type.IsMemory() {
stores = append(stores, v)
if v.Op == OpInitMem || v.Op == OpPhi {
continue
}
a := v.Args[len(v.Args)-1]
if v.Op == OpSelect1 {
a = a.Args[len(a.Args)-1]
}
sset.add(a.ID) // record that a is used
}
if v.Op == OpNilCheck {
hasNilCheck = true
}
}
if len(stores) == 0 || !hasNilCheck {
// there is no store or nilcheck, the order does not matter
return values
}
f := stores[0].Block.Func
// find last store, which is the one that is not used by other stores
var last *Value
for _, v := range stores {
if !sset.contains(v.ID) {
if last != nil {
f.Fatalf("two stores live simutaneously: %v and %v", v, last)
}
last = v
}
}
// We assign a store number to each value. Store number is the
// index of the latest store that this value transitively depends.
// The i-th store in the current block gets store number 3*i. A nil
// check that depends on the i-th store gets store number 3*i+1.
// Other values that depends on the i-th store gets store number 3*i+2.
// Special case: 0 -- unassigned, 1 or 2 -- the latest store it depends
// is in the previous block (or no store at all, e.g. value is Const).
// First we assign the number to all stores by walking back the store chain,
// then assign the number to other values in DFS order.
count := make([]int32, 3*(len(stores)+1))
sset.clear() // reuse sparse set to ensure that a value is pushed to stack only once
for n, w := len(stores), last; n > 0; n-- {
storeNumber[w.ID] = int32(3 * n)
count[3*n]++
sset.add(w.ID)
if w.Op == OpInitMem || w.Op == OpPhi {
if n != 1 {
f.Fatalf("store order is wrong: there are stores before %v", w)
}
break
}
if w.Op == OpSelect1 {
w = w.Args[0]
}
w = w.Args[len(w.Args)-1]
}
var stack []*Value
for _, v := range values {
if sset.contains(v.ID) {
// in sset means v is a store, or already pushed to stack, or already assigned a store number
continue
}
stack = append(stack, v)
sset.add(v.ID)
for len(stack) > 0 {
w := stack[len(stack)-1]
if storeNumber[w.ID] != 0 {
stack = stack[:len(stack)-1]
continue
}
if w.Op == OpPhi {
// Phi value doesn't depend on store in the current block.
// Do this early to avoid dependency cycle.
storeNumber[w.ID] = 2
count[2]++
stack = stack[:len(stack)-1]
continue
}
max := int32(0) // latest store dependency
argsdone := true
for _, a := range w.Args {
if a.Block != w.Block {
continue
}
if !sset.contains(a.ID) {
stack = append(stack, a)
sset.add(a.ID)
argsdone = false
continue
}
if storeNumber[a.ID]/3 > max {
max = storeNumber[a.ID] / 3
}
}
if !argsdone {
continue
}
n := 3*max + 2
if w.Op == OpNilCheck {
n = 3*max + 1
}
storeNumber[w.ID] = n
count[n]++
stack = stack[:len(stack)-1]
}
}
// convert count to prefix sum of counts: count'[i] = sum_{j<=i} count[i]
for i := range count {
if i == 0 {
continue
}
count[i] += count[i-1]
}
if count[len(count)-1] != int32(len(values)) {
f.Fatalf("storeOrder: value is missing, total count = %d, values = %v", count[len(count)-1], values)
}
// place values in count-indexed bins, which are in the desired store order
order := make([]*Value, len(values))
for _, v := range values {
s := storeNumber[v.ID]
order[count[s-1]] = v
count[s-1]++
}
return order
}
......@@ -277,3 +277,167 @@ func schedule(f *Func) {
f.scheduled = true
}
// storeOrder orders values with respect to stores. That is,
// if v transitively depends on store s, v is ordered after s,
// otherwise v is ordered before s.
// Specifically, values are ordered like
// store1
// NilCheck that depends on store1
// other values that depends on store1
// store2
// NilCheck that depends on store2
// other values that depends on store2
// ...
// The order of non-store and non-NilCheck values are undefined
// (not necessarily dependency order). This should be cheaper
// than a full scheduling as done above.
// Note that simple dependency order won't work: there is no
// dependency between NilChecks and values like IsNonNil.
// Auxiliary data structures are passed in as arguments, so
// that they can be allocated in the caller and be reused.
// This function takes care of reset them.
func storeOrder(values []*Value, sset *sparseSet, storeNumber []int32) []*Value {
if len(values) == 0 {
return values
}
f := values[0].Block.Func
// find all stores
var stores []*Value // members of values that are store values
hasNilCheck := false
sset.clear() // sset is the set of stores that are used in other values
for _, v := range values {
if v.Type.IsMemory() {
stores = append(stores, v)
if v.Op == OpInitMem || v.Op == OpPhi {
continue
}
a := v.Args[len(v.Args)-1]
if v.Op == OpSelect1 {
a = a.Args[len(a.Args)-1]
}
sset.add(a.ID) // record that a is used
}
if v.Op == OpNilCheck {
hasNilCheck = true
}
}
if len(stores) == 0 || !hasNilCheck && f.pass.name == "nilcheckelim" {
// there is no store, the order does not matter
return values
}
// find last store, which is the one that is not used by other stores
var last *Value
for _, v := range stores {
if !sset.contains(v.ID) {
if last != nil {
f.Fatalf("two stores live simutaneously: %v and %v", v, last)
}
last = v
}
}
// We assign a store number to each value. Store number is the
// index of the latest store that this value transitively depends.
// The i-th store in the current block gets store number 3*i. A nil
// check that depends on the i-th store gets store number 3*i+1.
// Other values that depends on the i-th store gets store number 3*i+2.
// Special case: 0 -- unassigned, 1 or 2 -- the latest store it depends
// is in the previous block (or no store at all, e.g. value is Const).
// First we assign the number to all stores by walking back the store chain,
// then assign the number to other values in DFS order.
count := make([]int32, 3*(len(stores)+1))
sset.clear() // reuse sparse set to ensure that a value is pushed to stack only once
for n, w := len(stores), last; n > 0; n-- {
storeNumber[w.ID] = int32(3 * n)
count[3*n]++
sset.add(w.ID)
if w.Op == OpInitMem || w.Op == OpPhi {
if n != 1 {
f.Fatalf("store order is wrong: there are stores before %v", w)
}
break
}
if w.Op == OpSelect1 {
w = w.Args[0]
}
w = w.Args[len(w.Args)-1]
}
var stack []*Value
for _, v := range values {
if sset.contains(v.ID) {
// in sset means v is a store, or already pushed to stack, or already assigned a store number
continue
}
stack = append(stack, v)
sset.add(v.ID)
for len(stack) > 0 {
w := stack[len(stack)-1]
if storeNumber[w.ID] != 0 {
stack = stack[:len(stack)-1]
continue
}
if w.Op == OpPhi {
// Phi value doesn't depend on store in the current block.
// Do this early to avoid dependency cycle.
storeNumber[w.ID] = 2
count[2]++
stack = stack[:len(stack)-1]
continue
}
max := int32(0) // latest store dependency
argsdone := true
for _, a := range w.Args {
if a.Block != w.Block {
continue
}
if !sset.contains(a.ID) {
stack = append(stack, a)
sset.add(a.ID)
argsdone = false
continue
}
if storeNumber[a.ID]/3 > max {
max = storeNumber[a.ID] / 3
}
}
if !argsdone {
continue
}
n := 3*max + 2
if w.Op == OpNilCheck {
n = 3*max + 1
}
storeNumber[w.ID] = n
count[n]++
stack = stack[:len(stack)-1]
}
}
// convert count to prefix sum of counts: count'[i] = sum_{j<=i} count[i]
for i := range count {
if i == 0 {
continue
}
count[i] += count[i-1]
}
if count[len(count)-1] != int32(len(values)) {
f.Fatalf("storeOrder: value is missing, total count = %d, values = %v", count[len(count)-1], values)
}
// place values in count-indexed bins, which are in the desired store order
order := make([]*Value, len(values))
for _, v := range values {
s := storeNumber[v.ID]
order[count[s-1]] = v
count[s-1]++
}
return order
}
......@@ -22,21 +22,18 @@ import (
// and a normal store will be used.
// A sequence of WB stores for many pointer fields of a single type will
// be emitted together, with a single branch.
//
// Expanding WB ops introduces new control flows, and we would need to
// split a block into two if there were values after WB ops, which would
// require scheduling the values. To avoid this complexity, when building
// SSA, we make sure that WB ops are always at the end of a block. We do
// this before fuse as it may merge blocks. It also helps to reduce
// number of blocks as fuse merges blocks introduced in this phase.
func writebarrier(f *Func) {
var sb, sp, wbaddr *Value
var sb, sp, wbaddr, const0 *Value
var writebarrierptr, typedmemmove, typedmemclr *obj.LSym
var storeWBs, others []*Value
var wbs *sparseSet
for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no WB stores
valueLoop:
for i, v := range b.Values {
var stores, after []*Value
var sset *sparseSet
var storeNumber []int32
for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no stores to expand
// rewrite write barrier for stack writes to ordinary Store/Move/Zero,
// record presence of non-stack WB ops.
hasStore := false
for _, v := range b.Values {
switch v.Op {
case OpStoreWB, OpMoveWB, OpMoveWBVolatile, OpZeroWB:
if IsStackAddr(v.Args[0]) {
......@@ -52,11 +49,18 @@ func writebarrier(f *Func) {
}
continue
}
hasStore = true
break
}
}
if !hasStore {
continue
}
if wbaddr == nil {
// initalize global values for write barrier test and calls
// lazily initialize global values for write barrier test and calls
// find SB and SP values in entry block
initln := f.Entry.Pos
initpos := f.Entry.Pos
for _, v := range f.Entry.Values {
if v.Op == OpSB {
sb = v
......@@ -64,72 +68,67 @@ func writebarrier(f *Func) {
if v.Op == OpSP {
sp = v
}
if sb != nil && sp != nil {
break
}
}
if sb == nil {
sb = f.Entry.NewValue0(initln, OpSB, f.Config.fe.TypeUintptr())
sb = f.Entry.NewValue0(initpos, OpSB, f.Config.fe.TypeUintptr())
}
if sp == nil {
sp = f.Entry.NewValue0(initln, OpSP, f.Config.fe.TypeUintptr())
sp = f.Entry.NewValue0(initpos, OpSP, f.Config.fe.TypeUintptr())
}
wbsym := &ExternSymbol{Typ: f.Config.fe.TypeBool(), Sym: f.Config.fe.Syslook("writeBarrier")}
wbaddr = f.Entry.NewValue1A(initln, OpAddr, f.Config.fe.TypeUInt32().PtrTo(), wbsym, sb)
wbaddr = f.Entry.NewValue1A(initpos, OpAddr, f.Config.fe.TypeUInt32().PtrTo(), wbsym, sb)
writebarrierptr = f.Config.fe.Syslook("writebarrierptr")
typedmemmove = f.Config.fe.Syslook("typedmemmove")
typedmemclr = f.Config.fe.Syslook("typedmemclr")
const0 = f.ConstInt32(initpos, f.Config.fe.TypeUInt32(), 0)
wbs = f.newSparseSet(f.NumValues())
defer f.retSparseSet(wbs)
// allocate auxiliary data structures for computing store order
sset = f.newSparseSet(f.NumValues())
defer f.retSparseSet(sset)
storeNumber = make([]int32, f.NumValues())
}
pos := v.Pos
// order values in store order
b.Values = storeOrder(b.Values, sset, storeNumber)
// there may be a sequence of WB stores in the current block. find them.
storeWBs = storeWBs[:0]
others = others[:0]
wbs.clear()
for _, w := range b.Values[i:] {
again:
// find the start and end of the last contiguous WB store sequence.
// a branch will be inserted there. values after it will be moved
// to a new block.
var last *Value
var start, end int
values := b.Values
for i := len(values) - 1; i >= 0; i-- {
w := values[i]
if w.Op == OpStoreWB || w.Op == OpMoveWB || w.Op == OpMoveWBVolatile || w.Op == OpZeroWB {
storeWBs = append(storeWBs, w)
wbs.add(w.ID)
} else {
others = append(others, w)
}
}
// make sure that no value in this block depends on WB stores
for _, w := range b.Values {
if w.Op == OpStoreWB || w.Op == OpMoveWB || w.Op == OpMoveWBVolatile || w.Op == OpZeroWB {
continue
if last == nil {
last = w
end = i + 1
}
for _, a := range w.Args {
if wbs.contains(a.ID) {
f.Fatalf("value %v depends on WB store %v in the same block %v", w, a, b)
} else {
if last != nil {
start = i + 1
break
}
}
}
stores = append(stores[:0], b.Values[start:end]...) // copy to avoid aliasing
after = append(after[:0], b.Values[end:]...)
b.Values = b.Values[:start]
// find the memory before the WB stores
// this memory is not a WB store but it is used in a WB store.
var mem *Value
for _, w := range storeWBs {
a := w.Args[len(w.Args)-1]
if wbs.contains(a.ID) {
continue
}
if mem != nil {
b.Fatalf("two stores live simultaneously: %s, %s", mem, a)
}
mem = a
}
b.Values = append(b.Values[:i], others...) // move WB ops out of this block
mem := stores[0].Args[len(stores[0].Args)-1]
pos := stores[0].Pos
bThen := f.NewBlock(BlockPlain)
bElse := f.NewBlock(BlockPlain)
bEnd := f.NewBlock(b.Kind)
bThen.Pos = pos
bElse.Pos = pos
bEnd.Pos = pos
bEnd.Pos = b.Pos
b.Pos = pos
// set up control flow for end block
bEnd.SetControl(b.Control)
......@@ -142,7 +141,6 @@ func writebarrier(f *Func) {
// set up control flow for write barrier test
// load word, test word, avoiding partial register write from load byte.
flag := b.NewValue2(pos, OpLoad, f.Config.fe.TypeUInt32(), wbaddr, mem)
const0 := f.ConstInt32(pos, f.Config.fe.TypeUInt32(), 0)
flag = b.NewValue2(pos, OpNeq32, f.Config.fe.TypeBool(), flag, const0)
b.Kind = BlockIf
b.SetControl(flag)
......@@ -153,13 +151,16 @@ func writebarrier(f *Func) {
bThen.AddEdgeTo(bEnd)
bElse.AddEdgeTo(bEnd)
// for each write barrier store, append write barrier version to bThen
// and simple store version to bElse
memThen := mem
memElse := mem
for _, w := range storeWBs {
for _, w := range stores {
var val *Value
ptr := w.Args[0]
siz := w.AuxInt
typ := w.Aux // only non-nil for MoveWB, MoveWBVolatile, ZeroWB
pos = w.Pos
var op Op
var fn *obj.LSym
......@@ -186,6 +187,10 @@ func writebarrier(f *Func) {
} else {
memElse = bElse.NewValue3I(pos, op, TypeMem, siz, ptr, val, memElse)
}
if f.Config.fe.Debug_wb() {
f.Config.Warnl(pos, "write barrier")
}
}
// merge memory
......@@ -193,46 +198,33 @@ func writebarrier(f *Func) {
// which may be used in subsequent blocks. Other memories in the
// sequence must be dead after this block since there can be only
// one memory live.
last := storeWBs[0]
if len(storeWBs) > 1 {
// find the last store
last = nil
wbs.clear() // we reuse wbs to record WB stores that is used in another WB store
for _, w := range storeWBs {
wbs.add(w.Args[len(w.Args)-1].ID)
}
for _, w := range storeWBs {
if wbs.contains(w.ID) {
continue
}
if last != nil {
b.Fatalf("two stores live simultaneously: %s, %s", last, w)
}
last = w
}
}
bEnd.Values = append(bEnd.Values, last)
last.Block = bEnd
last.reset(OpPhi)
last.Type = TypeMem
last.AddArg(memThen)
last.AddArg(memElse)
for _, w := range storeWBs {
for _, w := range stores {
if w != last {
w.resetArgs()
}
}
for _, w := range storeWBs {
for _, w := range stores {
if w != last {
f.freeValue(w)
}
}
if f.Config.fe.Debug_wb() {
f.Config.Warnl(pos, "write barrier")
// put values after the store sequence into the end block
bEnd.Values = append(bEnd.Values, after...)
for _, w := range after {
w.Block = bEnd
}
break valueLoop
// if we have more stores in this block, do this block again
for _, w := range b.Values {
if w.Op == OpStoreWB || w.Op == OpMoveWB || w.Op == OpMoveWBVolatile || w.Op == OpZeroWB {
goto again
}
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment