cmd/compile: improve write barrier removal

We're allowed to remove a write barrier when both the old value in memory and the new value we're writing are not heap pointers. Improve both those checks a little bit. A pointer is known to not be a heap pointer if it is read from read-only memory. This sometimes happens for loads of pointers from string constants in read-only memory. Do a better job of tracking which parts of memory are known to be zero. Before we just kept track of a range of offsets in the most recently allocated object. For code that initializes the new object's fields in a nonstandard order, that tracking is imprecise. Instead, keep a bit map of the first 64 words of that object, so we can track precisely what we know to be zeroed. The new scheme is only precise up to the first 512 bytes of the object. After that, we'll use write barriers unnecessarily. Hopefully most initializers of large objects will use typedmemmove, which does only one write barrier check for the whole initialization. Fixes #34723 Update #21561 Change-Id: Idf6e1b7d525042fb67961302d4fc6f941393cac8 Reviewed-on: https://go-review.googlesource.com/c/go/+/199558 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>

cmd/compile: improve write barrier removal
We're allowed to remove a write barrier when both the old value in memory and the new value we're writing are not heap pointers. Improve both those checks a little bit. A pointer is known to not be a heap pointer if it is read from read-only memory. This sometimes happens for loads of pointers from string constants in read-only memory. Do a better job of tracking which parts of memory are known to be zero. Before we just kept track of a range of offsets in the most recently allocated object. For code that initializes the new object's fields in a nonstandard order, that tracking is imprecise. Instead, keep a bit map of the first 64 words of that object, so we can track precisely what we know to be zeroed. The new scheme is only precise up to the first 512 bytes of the object. After that, we'll use write barriers unnecessarily. Hopefully most initializers of large objects will use typedmemmove, which does only one write barrier check for the whole initialization. Fixes #34723 Update #21561 Change-Id: Idf6e1b7d525042fb67961302d4fc6f941393cac8 Reviewed-on: https://go-review.googlesource.com/c/go/+/199558 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
30da79d9 · Keith Randall · Keith Randall · 54001652 · 30da79d9 · 30da79d9
Commit 30da79d9 authored Oct 06, 2019 by Keith Randall Committed by Keith Randall Oct 07, 2019
3 changed files
--- a/src/cmd/compile/fmtmap_test.go
+++ b/src/cmd/compile/fmtmap_test.go
@@ -195,6 +195,7 @@ var knownFormats = map[string]string{
 	"uint32 %v":            "",
 	"uint32 %x":            "",
 	"uint64 %08x":          "",
+	"uint64 %b":            "",
 	"uint64 %d":            "",
 	"uint64 %x":            "",
 	"uint8 %d":             "",

--- a/src/cmd/compile/internal/ssa/writebarrier.go
+++ b/src/cmd/compile/internal/ssa/writebarrier.go
@@ -8,15 +8,19 @@ import (
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
 	"cmd/internal/src"
+	"fmt"
 	"strings"
 )
-// A ZeroRegion records a range of an object which is known to be zero.
+// A ZeroRegion records parts of an object which are known to be zero.
 // A ZeroRegion only applies to a single memory state.
+// Each bit in mask is set if the corresponding pointer-sized word of
+// the base object is known to be zero.
+// In other words, if mask & (1<<i) != 0, then [base+i*ptrSize, base+(i+1)*ptrSize)
+// is known to be zero.
 type ZeroRegion struct {
 	base *Value
-	min  int64
+	mask uint64
-	max  int64
 }
 // needwb reports whether we need write barrier for store op v.
@@ -46,11 +50,26 @@ func needwb(v *Value, zeroes map[ID]ZeroRegion) bool {
 			off += ptr.AuxInt
 			ptr = ptr.Args[0]
 		}
+		ptrSize := v.Block.Func.Config.PtrSize
+		if off%ptrSize != 0 || size%ptrSize != 0 {
+			v.Fatalf("unaligned pointer write")
+		}
+		if off < 0 || off+size > 64*ptrSize {
+			// write goes off end of tracked offsets
+			return true
+		}
 		z := zeroes[v.MemoryArg().ID]
-		if ptr == z.base && off >= z.min && off+size <= z.max {
+		if ptr != z.base {
-			return false
+			return true
+		}
+		for i := off; i < off+size; i += ptrSize {
+			if z.mask>>uint(i/ptrSize)&1 == 0 {
+				return true // not known to be zero
 			}
 		}
+		// All written locations are known to be zero - write barrier not needed.
+		return false
+	}
 	return true
 }
@@ -375,10 +394,11 @@ func writebarrier(f *Func) {
 // computeZeroMap returns a map from an ID of a memory value to
 // a set of locations that are known to be zeroed at that memory value.
 func (f *Func) computeZeroMap() map[ID]ZeroRegion {
+	ptrSize := f.Config.PtrSize
 	// Keep track of which parts of memory are known to be zero.
 	// This helps with removing write barriers for various initialization patterns.
 	// This analysis is conservative. We only keep track, for each memory state, of
-	// a single constant range of a single object which is known to be zero.
+	// which of the first 64 words of a single object are known to be zero.
 	zeroes := map[ID]ZeroRegion{}
 	// Find new objects.
 	for _, b := range f.Blocks {
@@ -388,7 +408,11 @@ func (f *Func) computeZeroMap() map[ID]ZeroRegion {
 			}
 			mem := v.MemoryArg()
 			if IsNewObject(v, mem) {
-				zeroes[mem.ID] = ZeroRegion{v, 0, v.Type.Elem().Size()}
+				nptr := v.Type.Elem().Size() / ptrSize
+				if nptr > 64 {
+					nptr = 64
+				}
+				zeroes[mem.ID] = ZeroRegion{base: v, mask: 1<<uint(nptr) - 1}
 			}
 		}
 	}
@@ -420,26 +444,36 @@ func (f *Func) computeZeroMap() map[ID]ZeroRegion {
 					// So we have to throw all the zero information we have away.
 					continue
 				}
-				if off < z.min || off+size > z.max {
+				// Round to cover any partially written pointer slots.
-					// Writing, at least partially, outside the known zeroes.
+				// Pointer writes should never be unaligned like this, but non-pointer
-					// We could salvage some zero information, but probably
+				// writes to pointer-containing types will do this.
-					// not worth it.
+				if d := off % ptrSize; d != 0 {
+					off -= d
+					size += d
+				}
+				if d := size % ptrSize; d != 0 {
+					size += ptrSize - d
+				}
+				// Clip to the 64 words that we track.
+				min := off
+				max := off + size
+				if min < 0 {
+					min = 0
+				}
+				if max > 64*ptrSize {
+					max = 64 * ptrSize
+				}
+				// Clear bits for parts that we are writing (and hence
+				// will no longer necessarily be zero).
+				for i := min; i < max; i += ptrSize {
+					bit := i / ptrSize
+					z.mask &^= 1 << uint(bit)
+				}
+				if z.mask == 0 {
+					// No more known zeros - don't bother keeping.
 					continue
 				}
-				// We now know we're storing to a zeroed area.
+				// Save updated known zero contents for new store.
-				// We need to make a smaller zero range for the result of this store.
-				if off == z.min {
-					z.min += size
-				} else if off+size == z.max {
-					z.max -= size
-				} else {
-					// The store splits the known zero range in two.
-					// Keep track of the upper one, as we tend to initialize
-					// things in increasing memory order.
-					// TODO: keep track of larger one instead?
-					z.min = off + size
-				}
-				// Save updated zero range.
 				if zeroes[v.ID] != z {
 					zeroes[v.ID] = z
 					changed = true
@@ -450,6 +484,12 @@ func (f *Func) computeZeroMap() map[ID]ZeroRegion {
 			break
 		}
 	}
+	if f.pass.debug > 0 {
+		fmt.Printf("func %s\n", f.Name)
+		for mem, z := range zeroes {
+			fmt.Printf("  memory=v%d ptr=%v zeromask=%b\n", mem, z.base, z.mask)
+		}
+	}
 	return zeroes
 }
@@ -512,20 +552,23 @@ func IsGlobalAddr(v *Value) bool {
 	if v.Op == OpConstNil {
 		return true
 	}
+	if v.Op == OpLoad && IsReadOnlyGlobalAddr(v.Args[0]) {
+		return true // loading from a read-only global - the resulting address can't be a heap address.
+	}
 	return false
 }
 // IsReadOnlyGlobalAddr reports whether v is known to be an address of a read-only global.
 func IsReadOnlyGlobalAddr(v *Value) bool {
-	if !IsGlobalAddr(v) {
-		return false
-	}
 	if v.Op == OpConstNil {
 		// Nil pointers are read only. See issue 33438.
 		return true
 	}
 	// See TODO in OpAddr case in IsSanitizerSafeAddr below.
-	return strings.HasPrefix(v.Aux.(*obj.LSym).Name, `""..stmp_`)
+	if v.Op == OpAddr && strings.HasPrefix(v.Aux.(*obj.LSym).Name, `""..stmp_`) {
+		return true
+	}
+	return false
 }
 // IsNewObject reports whether v is a pointer to a freshly allocated & zeroed object at memory state mem.

--- a/test/fixedbugs/issue34723.go
+++ b/test/fixedbugs/issue34723.go
+// errorcheck -0 -d=wb
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Make sure we don't introduce write barriers where we
+// don't need them. These cases are writing pointers to
+// globals to zeroed memory.
+package main
+func f1() []string {
+	return []string{"a"}
+}
+func f2() []string {
+	return []string{"a", "b"}
+}
+type T struct {
+	a [6]*int
+}
+func f3() *T {
+	t := new(T)
+	t.a[0] = &g
+	t.a[1] = &g
+	t.a[2] = &g
+	t.a[3] = &g
+	t.a[4] = &g
+	t.a[5] = &g
+	return t
+}
+func f4() *T {
+	t := new(T)
+	t.a[5] = &g
+	t.a[4] = &g
+	t.a[3] = &g
+	t.a[2] = &g
+	t.a[1] = &g
+	t.a[0] = &g
+	return t
+}
+func f5() *T {
+	t := new(T)
+	t.a[4] = &g
+	t.a[2] = &g
+	t.a[0] = &g
+	t.a[3] = &g
+	t.a[1] = &g
+	t.a[5] = &g
+	return t
+}
+type U struct {
+	a [65]*int
+}
+func f6() *U {
+	u := new(U)
+	u.a[63] = &g
+	// This offset is too large: we only track the first 64 pointers for zeroness.
+	u.a[64] = &g // ERROR "write barrier"
+	return u
+}
+var g int