Commit 30da79d9 authored by Keith Randall's avatar Keith Randall Committed by Keith Randall

cmd/compile: improve write barrier removal

We're allowed to remove a write barrier when both the old
value in memory and the new value we're writing are not heap pointers.

Improve both those checks a little bit.

A pointer is known to not be a heap pointer if it is read from
read-only memory. This sometimes happens for loads of pointers
from string constants in read-only memory.

Do a better job of tracking which parts of memory are known to be
zero.  Before we just kept track of a range of offsets in the most
recently allocated object. For code that initializes the new object's
fields in a nonstandard order, that tracking is imprecise. Instead,
keep a bit map of the first 64 words of that object, so we can track
precisely what we know to be zeroed.

The new scheme is only precise up to the first 512 bytes of the object.
After that, we'll use write barriers unnecessarily. Hopefully most
initializers of large objects will use typedmemmove, which does only one
write barrier check for the whole initialization.

Fixes #34723
Update #21561

Change-Id: Idf6e1b7d525042fb67961302d4fc6f941393cac8
Reviewed-on: https://go-review.googlesource.com/c/go/+/199558
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent 54001652
......@@ -195,6 +195,7 @@ var knownFormats = map[string]string{
"uint32 %v": "",
"uint32 %x": "",
"uint64 %08x": "",
"uint64 %b": "",
"uint64 %d": "",
"uint64 %x": "",
"uint8 %d": "",
......
......@@ -8,15 +8,19 @@ import (
"cmd/compile/internal/types"
"cmd/internal/obj"
"cmd/internal/src"
"fmt"
"strings"
)
// A ZeroRegion records a range of an object which is known to be zero.
// A ZeroRegion records parts of an object which are known to be zero.
// A ZeroRegion only applies to a single memory state.
// Each bit in mask is set if the corresponding pointer-sized word of
// the base object is known to be zero.
// In other words, if mask & (1<<i) != 0, then [base+i*ptrSize, base+(i+1)*ptrSize)
// is known to be zero.
type ZeroRegion struct {
base *Value
min int64
max int64
mask uint64
}
// needwb reports whether we need write barrier for store op v.
......@@ -46,10 +50,25 @@ func needwb(v *Value, zeroes map[ID]ZeroRegion) bool {
off += ptr.AuxInt
ptr = ptr.Args[0]
}
ptrSize := v.Block.Func.Config.PtrSize
if off%ptrSize != 0 || size%ptrSize != 0 {
v.Fatalf("unaligned pointer write")
}
if off < 0 || off+size > 64*ptrSize {
// write goes off end of tracked offsets
return true
}
z := zeroes[v.MemoryArg().ID]
if ptr == z.base && off >= z.min && off+size <= z.max {
return false
if ptr != z.base {
return true
}
for i := off; i < off+size; i += ptrSize {
if z.mask>>uint(i/ptrSize)&1 == 0 {
return true // not known to be zero
}
}
// All written locations are known to be zero - write barrier not needed.
return false
}
return true
}
......@@ -375,10 +394,11 @@ func writebarrier(f *Func) {
// computeZeroMap returns a map from an ID of a memory value to
// a set of locations that are known to be zeroed at that memory value.
func (f *Func) computeZeroMap() map[ID]ZeroRegion {
ptrSize := f.Config.PtrSize
// Keep track of which parts of memory are known to be zero.
// This helps with removing write barriers for various initialization patterns.
// This analysis is conservative. We only keep track, for each memory state, of
// a single constant range of a single object which is known to be zero.
// which of the first 64 words of a single object are known to be zero.
zeroes := map[ID]ZeroRegion{}
// Find new objects.
for _, b := range f.Blocks {
......@@ -388,7 +408,11 @@ func (f *Func) computeZeroMap() map[ID]ZeroRegion {
}
mem := v.MemoryArg()
if IsNewObject(v, mem) {
zeroes[mem.ID] = ZeroRegion{v, 0, v.Type.Elem().Size()}
nptr := v.Type.Elem().Size() / ptrSize
if nptr > 64 {
nptr = 64
}
zeroes[mem.ID] = ZeroRegion{base: v, mask: 1<<uint(nptr) - 1}
}
}
}
......@@ -420,26 +444,36 @@ func (f *Func) computeZeroMap() map[ID]ZeroRegion {
// So we have to throw all the zero information we have away.
continue
}
if off < z.min || off+size > z.max {
// Writing, at least partially, outside the known zeroes.
// We could salvage some zero information, but probably
// not worth it.
continue
// Round to cover any partially written pointer slots.
// Pointer writes should never be unaligned like this, but non-pointer
// writes to pointer-containing types will do this.
if d := off % ptrSize; d != 0 {
off -= d
size += d
}
// We now know we're storing to a zeroed area.
// We need to make a smaller zero range for the result of this store.
if off == z.min {
z.min += size
} else if off+size == z.max {
z.max -= size
} else {
// The store splits the known zero range in two.
// Keep track of the upper one, as we tend to initialize
// things in increasing memory order.
// TODO: keep track of larger one instead?
z.min = off + size
if d := size % ptrSize; d != 0 {
size += ptrSize - d
}
// Clip to the 64 words that we track.
min := off
max := off + size
if min < 0 {
min = 0
}
if max > 64*ptrSize {
max = 64 * ptrSize
}
// Save updated zero range.
// Clear bits for parts that we are writing (and hence
// will no longer necessarily be zero).
for i := min; i < max; i += ptrSize {
bit := i / ptrSize
z.mask &^= 1 << uint(bit)
}
if z.mask == 0 {
// No more known zeros - don't bother keeping.
continue
}
// Save updated known zero contents for new store.
if zeroes[v.ID] != z {
zeroes[v.ID] = z
changed = true
......@@ -450,6 +484,12 @@ func (f *Func) computeZeroMap() map[ID]ZeroRegion {
break
}
}
if f.pass.debug > 0 {
fmt.Printf("func %s\n", f.Name)
for mem, z := range zeroes {
fmt.Printf(" memory=v%d ptr=%v zeromask=%b\n", mem, z.base, z.mask)
}
}
return zeroes
}
......@@ -512,20 +552,23 @@ func IsGlobalAddr(v *Value) bool {
if v.Op == OpConstNil {
return true
}
if v.Op == OpLoad && IsReadOnlyGlobalAddr(v.Args[0]) {
return true // loading from a read-only global - the resulting address can't be a heap address.
}
return false
}
// IsReadOnlyGlobalAddr reports whether v is known to be an address of a read-only global.
func IsReadOnlyGlobalAddr(v *Value) bool {
if !IsGlobalAddr(v) {
return false
}
if v.Op == OpConstNil {
// Nil pointers are read only. See issue 33438.
return true
}
// See TODO in OpAddr case in IsSanitizerSafeAddr below.
return strings.HasPrefix(v.Aux.(*obj.LSym).Name, `""..stmp_`)
if v.Op == OpAddr && strings.HasPrefix(v.Aux.(*obj.LSym).Name, `""..stmp_`) {
return true
}
return false
}
// IsNewObject reports whether v is a pointer to a freshly allocated & zeroed object at memory state mem.
......
// errorcheck -0 -d=wb
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Make sure we don't introduce write barriers where we
// don't need them. These cases are writing pointers to
// globals to zeroed memory.
package main
func f1() []string {
return []string{"a"}
}
func f2() []string {
return []string{"a", "b"}
}
type T struct {
a [6]*int
}
func f3() *T {
t := new(T)
t.a[0] = &g
t.a[1] = &g
t.a[2] = &g
t.a[3] = &g
t.a[4] = &g
t.a[5] = &g
return t
}
func f4() *T {
t := new(T)
t.a[5] = &g
t.a[4] = &g
t.a[3] = &g
t.a[2] = &g
t.a[1] = &g
t.a[0] = &g
return t
}
func f5() *T {
t := new(T)
t.a[4] = &g
t.a[2] = &g
t.a[0] = &g
t.a[3] = &g
t.a[1] = &g
t.a[5] = &g
return t
}
type U struct {
a [65]*int
}
func f6() *U {
u := new(U)
u.a[63] = &g
// This offset is too large: we only track the first 64 pointers for zeroness.
u.a[64] = &g // ERROR "write barrier"
return u
}
var g int
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment