Commit 30da79d9 authored by Keith Randall's avatar Keith Randall Committed by Keith Randall

cmd/compile: improve write barrier removal

We're allowed to remove a write barrier when both the old
value in memory and the new value we're writing are not heap pointers.

Improve both those checks a little bit.

A pointer is known to not be a heap pointer if it is read from
read-only memory. This sometimes happens for loads of pointers
from string constants in read-only memory.

Do a better job of tracking which parts of memory are known to be
zero.  Before we just kept track of a range of offsets in the most
recently allocated object. For code that initializes the new object's
fields in a nonstandard order, that tracking is imprecise. Instead,
keep a bit map of the first 64 words of that object, so we can track
precisely what we know to be zeroed.

The new scheme is only precise up to the first 512 bytes of the object.
After that, we'll use write barriers unnecessarily. Hopefully most
initializers of large objects will use typedmemmove, which does only one
write barrier check for the whole initialization.

Fixes #34723
Update #21561

Change-Id: Idf6e1b7d525042fb67961302d4fc6f941393cac8
Reviewed-on: https://go-review.googlesource.com/c/go/+/199558
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent 54001652
...@@ -195,6 +195,7 @@ var knownFormats = map[string]string{ ...@@ -195,6 +195,7 @@ var knownFormats = map[string]string{
"uint32 %v": "", "uint32 %v": "",
"uint32 %x": "", "uint32 %x": "",
"uint64 %08x": "", "uint64 %08x": "",
"uint64 %b": "",
"uint64 %d": "", "uint64 %d": "",
"uint64 %x": "", "uint64 %x": "",
"uint8 %d": "", "uint8 %d": "",
......
...@@ -8,15 +8,19 @@ import ( ...@@ -8,15 +8,19 @@ import (
"cmd/compile/internal/types" "cmd/compile/internal/types"
"cmd/internal/obj" "cmd/internal/obj"
"cmd/internal/src" "cmd/internal/src"
"fmt"
"strings" "strings"
) )
// A ZeroRegion records a range of an object which is known to be zero. // A ZeroRegion records parts of an object which are known to be zero.
// A ZeroRegion only applies to a single memory state. // A ZeroRegion only applies to a single memory state.
// Each bit in mask is set if the corresponding pointer-sized word of
// the base object is known to be zero.
// In other words, if mask & (1<<i) != 0, then [base+i*ptrSize, base+(i+1)*ptrSize)
// is known to be zero.
type ZeroRegion struct { type ZeroRegion struct {
base *Value base *Value
min int64 mask uint64
max int64
} }
// needwb reports whether we need write barrier for store op v. // needwb reports whether we need write barrier for store op v.
...@@ -46,11 +50,26 @@ func needwb(v *Value, zeroes map[ID]ZeroRegion) bool { ...@@ -46,11 +50,26 @@ func needwb(v *Value, zeroes map[ID]ZeroRegion) bool {
off += ptr.AuxInt off += ptr.AuxInt
ptr = ptr.Args[0] ptr = ptr.Args[0]
} }
ptrSize := v.Block.Func.Config.PtrSize
if off%ptrSize != 0 || size%ptrSize != 0 {
v.Fatalf("unaligned pointer write")
}
if off < 0 || off+size > 64*ptrSize {
// write goes off end of tracked offsets
return true
}
z := zeroes[v.MemoryArg().ID] z := zeroes[v.MemoryArg().ID]
if ptr == z.base && off >= z.min && off+size <= z.max { if ptr != z.base {
return false return true
}
for i := off; i < off+size; i += ptrSize {
if z.mask>>uint(i/ptrSize)&1 == 0 {
return true // not known to be zero
} }
} }
// All written locations are known to be zero - write barrier not needed.
return false
}
return true return true
} }
...@@ -375,10 +394,11 @@ func writebarrier(f *Func) { ...@@ -375,10 +394,11 @@ func writebarrier(f *Func) {
// computeZeroMap returns a map from an ID of a memory value to // computeZeroMap returns a map from an ID of a memory value to
// a set of locations that are known to be zeroed at that memory value. // a set of locations that are known to be zeroed at that memory value.
func (f *Func) computeZeroMap() map[ID]ZeroRegion { func (f *Func) computeZeroMap() map[ID]ZeroRegion {
ptrSize := f.Config.PtrSize
// Keep track of which parts of memory are known to be zero. // Keep track of which parts of memory are known to be zero.
// This helps with removing write barriers for various initialization patterns. // This helps with removing write barriers for various initialization patterns.
// This analysis is conservative. We only keep track, for each memory state, of // This analysis is conservative. We only keep track, for each memory state, of
// a single constant range of a single object which is known to be zero. // which of the first 64 words of a single object are known to be zero.
zeroes := map[ID]ZeroRegion{} zeroes := map[ID]ZeroRegion{}
// Find new objects. // Find new objects.
for _, b := range f.Blocks { for _, b := range f.Blocks {
...@@ -388,7 +408,11 @@ func (f *Func) computeZeroMap() map[ID]ZeroRegion { ...@@ -388,7 +408,11 @@ func (f *Func) computeZeroMap() map[ID]ZeroRegion {
} }
mem := v.MemoryArg() mem := v.MemoryArg()
if IsNewObject(v, mem) { if IsNewObject(v, mem) {
zeroes[mem.ID] = ZeroRegion{v, 0, v.Type.Elem().Size()} nptr := v.Type.Elem().Size() / ptrSize
if nptr > 64 {
nptr = 64
}
zeroes[mem.ID] = ZeroRegion{base: v, mask: 1<<uint(nptr) - 1}
} }
} }
} }
...@@ -420,26 +444,36 @@ func (f *Func) computeZeroMap() map[ID]ZeroRegion { ...@@ -420,26 +444,36 @@ func (f *Func) computeZeroMap() map[ID]ZeroRegion {
// So we have to throw all the zero information we have away. // So we have to throw all the zero information we have away.
continue continue
} }
if off < z.min || off+size > z.max { // Round to cover any partially written pointer slots.
// Writing, at least partially, outside the known zeroes. // Pointer writes should never be unaligned like this, but non-pointer
// We could salvage some zero information, but probably // writes to pointer-containing types will do this.
// not worth it. if d := off % ptrSize; d != 0 {
off -= d
size += d
}
if d := size % ptrSize; d != 0 {
size += ptrSize - d
}
// Clip to the 64 words that we track.
min := off
max := off + size
if min < 0 {
min = 0
}
if max > 64*ptrSize {
max = 64 * ptrSize
}
// Clear bits for parts that we are writing (and hence
// will no longer necessarily be zero).
for i := min; i < max; i += ptrSize {
bit := i / ptrSize
z.mask &^= 1 << uint(bit)
}
if z.mask == 0 {
// No more known zeros - don't bother keeping.
continue continue
} }
// We now know we're storing to a zeroed area. // Save updated known zero contents for new store.
// We need to make a smaller zero range for the result of this store.
if off == z.min {
z.min += size
} else if off+size == z.max {
z.max -= size
} else {
// The store splits the known zero range in two.
// Keep track of the upper one, as we tend to initialize
// things in increasing memory order.
// TODO: keep track of larger one instead?
z.min = off + size
}
// Save updated zero range.
if zeroes[v.ID] != z { if zeroes[v.ID] != z {
zeroes[v.ID] = z zeroes[v.ID] = z
changed = true changed = true
...@@ -450,6 +484,12 @@ func (f *Func) computeZeroMap() map[ID]ZeroRegion { ...@@ -450,6 +484,12 @@ func (f *Func) computeZeroMap() map[ID]ZeroRegion {
break break
} }
} }
if f.pass.debug > 0 {
fmt.Printf("func %s\n", f.Name)
for mem, z := range zeroes {
fmt.Printf(" memory=v%d ptr=%v zeromask=%b\n", mem, z.base, z.mask)
}
}
return zeroes return zeroes
} }
...@@ -512,20 +552,23 @@ func IsGlobalAddr(v *Value) bool { ...@@ -512,20 +552,23 @@ func IsGlobalAddr(v *Value) bool {
if v.Op == OpConstNil { if v.Op == OpConstNil {
return true return true
} }
if v.Op == OpLoad && IsReadOnlyGlobalAddr(v.Args[0]) {
return true // loading from a read-only global - the resulting address can't be a heap address.
}
return false return false
} }
// IsReadOnlyGlobalAddr reports whether v is known to be an address of a read-only global. // IsReadOnlyGlobalAddr reports whether v is known to be an address of a read-only global.
func IsReadOnlyGlobalAddr(v *Value) bool { func IsReadOnlyGlobalAddr(v *Value) bool {
if !IsGlobalAddr(v) {
return false
}
if v.Op == OpConstNil { if v.Op == OpConstNil {
// Nil pointers are read only. See issue 33438. // Nil pointers are read only. See issue 33438.
return true return true
} }
// See TODO in OpAddr case in IsSanitizerSafeAddr below. // See TODO in OpAddr case in IsSanitizerSafeAddr below.
return strings.HasPrefix(v.Aux.(*obj.LSym).Name, `""..stmp_`) if v.Op == OpAddr && strings.HasPrefix(v.Aux.(*obj.LSym).Name, `""..stmp_`) {
return true
}
return false
} }
// IsNewObject reports whether v is a pointer to a freshly allocated & zeroed object at memory state mem. // IsNewObject reports whether v is a pointer to a freshly allocated & zeroed object at memory state mem.
......
// errorcheck -0 -d=wb
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Make sure we don't introduce write barriers where we
// don't need them. These cases are writing pointers to
// globals to zeroed memory.
package main
func f1() []string {
return []string{"a"}
}
func f2() []string {
return []string{"a", "b"}
}
type T struct {
a [6]*int
}
func f3() *T {
t := new(T)
t.a[0] = &g
t.a[1] = &g
t.a[2] = &g
t.a[3] = &g
t.a[4] = &g
t.a[5] = &g
return t
}
func f4() *T {
t := new(T)
t.a[5] = &g
t.a[4] = &g
t.a[3] = &g
t.a[2] = &g
t.a[1] = &g
t.a[0] = &g
return t
}
func f5() *T {
t := new(T)
t.a[4] = &g
t.a[2] = &g
t.a[0] = &g
t.a[3] = &g
t.a[1] = &g
t.a[5] = &g
return t
}
type U struct {
a [65]*int
}
func f6() *U {
u := new(U)
u.a[63] = &g
// This offset is too large: we only track the first 64 pointers for zeroness.
u.a[64] = &g // ERROR "write barrier"
return u
}
var g int
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment