Commit 877387e3 authored by Austin Clements's avatar Austin Clements

runtime: use buffered write barrier for bulkBarrierPreWrite

This modifies bulkBarrierPreWrite to use the buffered write barrier
instead of the eager write barrier. This reduces the number of system
stack switches and sanity checks by a factor of the buffer size
(currently 256). This affects both typedmemmove and typedmemclr.

Since this is purely a runtime change, it applies to all arches
(unlike the pointer write barrier).

name                 old time/op  new time/op  delta
BulkWriteBarrier-12  7.33ns ± 6%  4.46ns ± 9%  -39.10%  (p=0.000 n=20+19)

Updates #22460.

Change-Id: I6a686a63bbf08be02b9b97250e37163c5a90cdd8
Reviewed-on: https://go-review.googlesource.com/73832
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarRick Hudson <rlh@golang.org>
parent 6a5f1e58
......@@ -239,6 +239,10 @@ func writebarrierptr_prewrite(dst *uintptr, src uintptr) {
// typedmemmove copies a value of type t to dst from src.
// Must be nosplit, see #16026.
//
// TODO: Perfect for go:nosplitrec since we can't have a safe point
// anywhere in the bulk barrier or memmove.
//
//go:nosplit
func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
if typ.kind&kindNoPointers == 0 {
......
......@@ -523,12 +523,13 @@ func (h heapBits) setCheckmarked(size uintptr) {
atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift))
}
// bulkBarrierPreWrite executes writebarrierptr_prewrite1
// bulkBarrierPreWrite executes a write barrier
// for every pointer slot in the memory range [src, src+size),
// using pointer/scalar information from [dst, dst+size).
// This executes the write barriers necessary before a memmove.
// src, dst, and size must be pointer-aligned.
// The range [dst, dst+size) must lie within a single object.
// It does not perform the actual writes.
//
// As a special case, src == 0 indicates that this is being used for a
// memclr. bulkBarrierPreWrite will pass 0 for the src of each write
......@@ -578,12 +579,15 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
return
}
buf := &getg().m.p.ptr().wbBuf
h := heapBitsForAddr(dst)
if src == 0 {
for i := uintptr(0); i < size; i += sys.PtrSize {
if h.isPointer() {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
writebarrierptr_prewrite1(dstx, 0)
if !buf.putFast(*dstx, 0) {
wbBufFlush(nil, 0)
}
}
h = h.next()
}
......@@ -592,7 +596,9 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
if h.isPointer() {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
srcx := (*uintptr)(unsafe.Pointer(src + i))
writebarrierptr_prewrite1(dstx, *srcx)
if !buf.putFast(*dstx, *srcx) {
wbBufFlush(nil, 0)
}
}
h = h.next()
}
......@@ -612,6 +618,7 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
bits = addb(bits, word/8)
mask := uint8(1) << (word % 8)
buf := &getg().m.p.ptr().wbBuf
for i := uintptr(0); i < size; i += sys.PtrSize {
if mask == 0 {
bits = addb(bits, 1)
......@@ -625,10 +632,14 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
if *bits&mask != 0 {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
if src == 0 {
writebarrierptr_prewrite1(dstx, 0)
if !buf.putFast(*dstx, 0) {
wbBufFlush(nil, 0)
}
} else {
srcx := (*uintptr)(unsafe.Pointer(src + i))
writebarrierptr_prewrite1(dstx, *srcx)
if !buf.putFast(*dstx, *srcx) {
wbBufFlush(nil, 0)
}
}
}
mask <<= 1
......
......@@ -20,6 +20,7 @@
package runtime
import (
"runtime/internal/sys"
"unsafe"
)
......@@ -94,6 +95,37 @@ func (b *wbBuf) reset() {
}
}
// putFast adds old and new to the write barrier buffer and returns
// false if a flush is necessary. Callers should use this as:
//
// buf := &getg().m.p.ptr().wbBuf
// if !buf.putFast(old, new) {
// wbBufFlush(...)
// }
//
// The arguments to wbBufFlush depend on whether the caller is doing
// its own cgo pointer checks. If it is, then this can be
// wbBufFlush(nil, 0). Otherwise, it must pass the slot address and
// new.
//
// Since buf is a per-P resource, the caller must ensure there are no
// preemption points while buf is in use.
//
// It must be nowritebarrierrec to because write barriers here would
// corrupt the write barrier buffer. It (and everything it calls, if
// it called anything) has to be nosplit to avoid scheduling on to a
// different P and a different buffer.
//
//go:nowritebarrierrec
//go:nosplit
func (b *wbBuf) putFast(old, new uintptr) bool {
p := (*[2]uintptr)(unsafe.Pointer(b.next))
p[0] = old
p[1] = new
b.next += 2 * sys.PtrSize
return b.next != b.end
}
// wbBufFlush flushes the current P's write barrier buffer to the GC
// workbufs. It is passed the slot and value of the write barrier that
// caused the flush so that it can implement cgocheck.
......@@ -118,7 +150,7 @@ func wbBufFlush(dst *uintptr, src uintptr) {
return
}
if writeBarrier.cgo {
if writeBarrier.cgo && dst != nil {
// This must be called from the stack that did the
// write. It's nosplit all the way down.
cgoCheckWriteBarrier(dst, src)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment