Commit 32d6fbcb authored by Russ Cox's avatar Russ Cox

runtime: replace needwb() with writeBarrierEnabled

Reduce the write barrier check to a single load and compare
so that it can be inlined into write barrier use sites.
Makes the standard write barrier a little faster too.

name                                       old                     new          delta
BenchmarkBinaryTree17              17.9s × (0.99,1.01)     17.9s × (1.00,1.01)  ~
BenchmarkFannkuch11                4.35s × (1.00,1.00)     4.43s × (1.00,1.00)  +1.81%
BenchmarkFmtFprintfEmpty           120ns × (0.93,1.06)     110ns × (1.00,1.06)  -7.92%
BenchmarkFmtFprintfString          479ns × (0.99,1.00)     487ns × (0.99,1.00)  +1.67%
BenchmarkFmtFprintfInt             452ns × (0.99,1.02)     450ns × (0.99,1.00)  ~
BenchmarkFmtFprintfIntInt          766ns × (0.99,1.01)     762ns × (1.00,1.00)  ~
BenchmarkFmtFprintfPrefixedInt     576ns × (0.98,1.01)     584ns × (0.99,1.01)  ~
BenchmarkFmtFprintfFloat           730ns × (1.00,1.01)     738ns × (1.00,1.00)  +1.16%
BenchmarkFmtManyArgs              2.84µs × (0.99,1.00)    2.80µs × (1.00,1.01)  -1.22%
BenchmarkGobDecode                39.3ms × (0.98,1.01)    39.0ms × (0.99,1.00)  ~
BenchmarkGobEncode                39.5ms × (0.99,1.01)    37.8ms × (0.98,1.01)  -4.33%
BenchmarkGzip                      663ms × (1.00,1.01)     661ms × (0.99,1.01)  ~
BenchmarkGunzip                    143ms × (1.00,1.00)     142ms × (1.00,1.00)  ~
BenchmarkHTTPClientServer          132µs × (0.99,1.01)     132µs × (0.99,1.01)  ~
BenchmarkJSONEncode               57.4ms × (0.99,1.01)    56.3ms × (0.99,1.01)  -1.96%
BenchmarkJSONDecode                139ms × (0.99,1.00)     138ms × (0.99,1.01)  ~
BenchmarkMandelbrot200            6.03ms × (1.00,1.00)    6.01ms × (1.00,1.00)  ~
BenchmarkGoParse                  10.3ms × (0.89,1.14)    10.2ms × (0.87,1.05)  ~
BenchmarkRegexpMatchEasy0_32       209ns × (1.00,1.00)     208ns × (1.00,1.00)  ~
BenchmarkRegexpMatchEasy0_1K       591ns × (0.99,1.00)     588ns × (1.00,1.00)  ~
BenchmarkRegexpMatchEasy1_32       184ns × (0.99,1.02)     182ns × (0.99,1.01)  ~
BenchmarkRegexpMatchEasy1_1K      1.01µs × (1.00,1.00)    0.99µs × (1.00,1.01)  -2.33%
BenchmarkRegexpMatchMedium_32      330ns × (1.00,1.00)     323ns × (1.00,1.01)  -2.12%
BenchmarkRegexpMatchMedium_1K     92.6µs × (1.00,1.00)    89.9µs × (1.00,1.00)  -2.92%
BenchmarkRegexpMatchHard_32       4.80µs × (0.95,1.00)    4.72µs × (0.95,1.01)  ~
BenchmarkRegexpMatchHard_1K        136µs × (1.00,1.00)     133µs × (1.00,1.01)  -1.86%
BenchmarkRevcomp                   900ms × (0.99,1.04)     900ms × (1.00,1.05)  ~
BenchmarkTemplate                  172ms × (1.00,1.00)     168ms × (0.99,1.01)  -2.07%
BenchmarkTimeParse                 637ns × (1.00,1.00)     637ns × (1.00,1.00)  ~
BenchmarkTimeFormat                744ns × (1.00,1.01)     738ns × (1.00,1.00)  -0.67%

Change-Id: I4ecc925805da1f5ee264377f1f7574f54ee575e7
Reviewed-on: https://go-review.googlesource.com/9321Reviewed-by: default avatarAustin Clements <austin@google.com>
parent 2050f571
...@@ -76,13 +76,6 @@ func gcmarkwb_m(slot *uintptr, ptr uintptr) { ...@@ -76,13 +76,6 @@ func gcmarkwb_m(slot *uintptr, ptr uintptr) {
} }
} }
// needwb reports whether a write barrier is needed now
// (otherwise the write can be made directly).
//go:nosplit
func needwb() bool {
return gcphase == _GCmark || gcphase == _GCmarktermination || mheap_.shadow_enabled
}
// Write barrier calls must not happen during critical GC and scheduler // Write barrier calls must not happen during critical GC and scheduler
// related operations. In particular there are times when the GC assumes // related operations. In particular there are times when the GC assumes
// that the world is stopped but scheduler related code is still being // that the world is stopped but scheduler related code is still being
...@@ -114,7 +107,7 @@ func writebarrierptr_nostore1(dst *uintptr, src uintptr) { ...@@ -114,7 +107,7 @@ func writebarrierptr_nostore1(dst *uintptr, src uintptr) {
// but if we do that, Go inserts a write barrier on *dst = src. // but if we do that, Go inserts a write barrier on *dst = src.
//go:nosplit //go:nosplit
func writebarrierptr(dst *uintptr, src uintptr) { func writebarrierptr(dst *uintptr, src uintptr) {
if !needwb() { if !writeBarrierEnabled {
*dst = src *dst = src
return return
} }
...@@ -155,7 +148,7 @@ func writebarrierptr_shadow(dst *uintptr, src uintptr) { ...@@ -155,7 +148,7 @@ func writebarrierptr_shadow(dst *uintptr, src uintptr) {
// Do not reapply. // Do not reapply.
//go:nosplit //go:nosplit
func writebarrierptr_nostore(dst *uintptr, src uintptr) { func writebarrierptr_nostore(dst *uintptr, src uintptr) {
if !needwb() { if !writeBarrierEnabled {
return return
} }
...@@ -224,7 +217,7 @@ func writebarrieriface(dst *[2]uintptr, src [2]uintptr) { ...@@ -224,7 +217,7 @@ func writebarrieriface(dst *[2]uintptr, src [2]uintptr) {
// typedmemmove copies a value of type t to dst from src. // typedmemmove copies a value of type t to dst from src.
//go:nosplit //go:nosplit
func typedmemmove(typ *_type, dst, src unsafe.Pointer) { func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
if !needwb() || (typ.kind&kindNoPointers) != 0 { if !writeBarrierEnabled || (typ.kind&kindNoPointers) != 0 {
memmove(dst, src, typ.size) memmove(dst, src, typ.size)
return return
} }
...@@ -266,7 +259,7 @@ func reflect_typedmemmove(typ *_type, dst, src unsafe.Pointer) { ...@@ -266,7 +259,7 @@ func reflect_typedmemmove(typ *_type, dst, src unsafe.Pointer) {
// dst and src point off bytes into the value and only copies size bytes. // dst and src point off bytes into the value and only copies size bytes.
//go:linkname reflect_typedmemmovepartial reflect.typedmemmovepartial //go:linkname reflect_typedmemmovepartial reflect.typedmemmovepartial
func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size uintptr) { func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size uintptr) {
if !needwb() || (typ.kind&kindNoPointers) != 0 || size < ptrSize { if !writeBarrierEnabled || (typ.kind&kindNoPointers) != 0 || size < ptrSize {
memmove(dst, src, size) memmove(dst, src, size)
return return
} }
...@@ -309,7 +302,7 @@ func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size ...@@ -309,7 +302,7 @@ func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size
// not to be preempted before the write barriers have been run. // not to be preempted before the write barriers have been run.
//go:nosplit //go:nosplit
func callwritebarrier(typ *_type, frame unsafe.Pointer, framesize, retoffset uintptr) { func callwritebarrier(typ *_type, frame unsafe.Pointer, framesize, retoffset uintptr) {
if !needwb() || typ == nil || (typ.kind&kindNoPointers) != 0 || framesize-retoffset < ptrSize { if !writeBarrierEnabled || typ == nil || (typ.kind&kindNoPointers) != 0 || framesize-retoffset < ptrSize {
return return
} }
...@@ -349,7 +342,7 @@ func typedslicecopy(typ *_type, dst, src slice) int { ...@@ -349,7 +342,7 @@ func typedslicecopy(typ *_type, dst, src slice) int {
racereadrangepc(srcp, uintptr(n)*typ.size, callerpc, pc) racereadrangepc(srcp, uintptr(n)*typ.size, callerpc, pc)
} }
if !needwb() { if !writeBarrierEnabled {
memmove(dstp, srcp, uintptr(n)*typ.size) memmove(dstp, srcp, uintptr(n)*typ.size)
return n return n
} }
...@@ -465,6 +458,7 @@ func wbshadowinit() { ...@@ -465,6 +458,7 @@ func wbshadowinit() {
} }
mheap_.shadow_enabled = true mheap_.shadow_enabled = true
writeBarrierEnabled = true
} }
// shadowptr returns a pointer to the shadow value for addr. // shadowptr returns a pointer to the shadow value for addr.
......
...@@ -170,6 +170,32 @@ func setGCPercent(in int32) (out int32) { ...@@ -170,6 +170,32 @@ func setGCPercent(in int32) (out int32) {
return out return out
} }
// Garbage collector phase.
// Indicates to write barrier and sychronization task to preform.
var gcphase uint32
var writeBarrierEnabled bool // compiler emits references to this in write barriers
// gcBlackenEnabled is 1 if mutator assists and background mark
// workers are allowed to blacken objects. This must only be set when
// gcphase == _GCmark.
var gcBlackenEnabled uint32
const (
_GCoff = iota // GC not running, write barrier disabled
_GCquiesce // unused state
_GCstw // unused state
_GCscan // GC collecting roots into workbufs, write barrier disabled
_GCmark // GC marking from workbufs, write barrier ENABLED
_GCmarktermination // GC mark termination: allocate black, P's help GC, write barrier ENABLED
_GCsweep // GC mark completed; sweeping in background, write barrier disabled
)
//go:nosplit
func setGCPhase(x uint32) {
atomicstore(&gcphase, x)
writeBarrierEnabled = gcphase == _GCmark || gcphase == _GCmarktermination || mheap_.shadow_enabled
}
// gcMarkWorkerMode represents the mode that a concurrent mark worker // gcMarkWorkerMode represents the mode that a concurrent mark worker
// should operate in. // should operate in.
// //
...@@ -753,7 +779,7 @@ func gc(mode int) { ...@@ -753,7 +779,7 @@ func gc(mode int) {
heapGoal = gcController.heapGoal heapGoal = gcController.heapGoal
systemstack(func() { systemstack(func() {
gcphase = _GCscan setGCPhase(_GCscan)
// Concurrent scan. // Concurrent scan.
starttheworld() starttheworld()
...@@ -769,7 +795,7 @@ func gc(mode int) { ...@@ -769,7 +795,7 @@ func gc(mode int) {
if debug.gctrace > 0 { if debug.gctrace > 0 {
tInstallWB = nanotime() tInstallWB = nanotime()
} }
atomicstore(&gcphase, _GCmark) setGCPhase(_GCmark)
// Ensure all Ps have observed the phase // Ensure all Ps have observed the phase
// change and have write barriers enabled // change and have write barriers enabled
// before any blackening occurs. // before any blackening occurs.
...@@ -826,7 +852,7 @@ func gc(mode int) { ...@@ -826,7 +852,7 @@ func gc(mode int) {
// World is stopped. // World is stopped.
// Start marktermination which includes enabling the write barrier. // Start marktermination which includes enabling the write barrier.
atomicstore(&gcBlackenEnabled, 0) atomicstore(&gcBlackenEnabled, 0)
gcphase = _GCmarktermination setGCPhase(_GCmarktermination)
if debug.gctrace > 0 { if debug.gctrace > 0 {
heap1 = memstats.heap_live heap1 = memstats.heap_live
...@@ -862,7 +888,7 @@ func gc(mode int) { ...@@ -862,7 +888,7 @@ func gc(mode int) {
} }
// marking is complete so we can turn the write barrier off // marking is complete so we can turn the write barrier off
gcphase = _GCoff setGCPhase(_GCoff)
gcSweep(mode) gcSweep(mode)
if debug.gctrace > 1 { if debug.gctrace > 1 {
...@@ -876,9 +902,9 @@ func gc(mode int) { ...@@ -876,9 +902,9 @@ func gc(mode int) {
// Still in STW but gcphase is _GCoff, reset to _GCmarktermination // Still in STW but gcphase is _GCoff, reset to _GCmarktermination
// At this point all objects will be found during the gcMark which // At this point all objects will be found during the gcMark which
// does a complete STW mark and object scan. // does a complete STW mark and object scan.
gcphase = _GCmarktermination setGCPhase(_GCmarktermination)
gcMark(startTime) gcMark(startTime)
gcphase = _GCoff // marking is done, turn off wb. setGCPhase(_GCoff) // marking is done, turn off wb.
gcSweep(mode) gcSweep(mode)
} }
}) })
......
...@@ -514,30 +514,12 @@ type lfnode struct { ...@@ -514,30 +514,12 @@ type lfnode struct {
pushcnt uintptr pushcnt uintptr
} }
// Indicates to write barrier and sychronization task to preform.
const (
_GCoff = iota // GC not running, write barrier disabled
_GCquiesce // unused state
_GCstw // unused state
_GCscan // GC collecting roots into workbufs, write barrier disabled
_GCmark // GC marking from workbufs, write barrier ENABLED
_GCmarktermination // GC mark termination: allocate black, P's help GC, write barrier ENABLED
_GCsweep // GC mark completed; sweeping in background, write barrier disabled
)
type forcegcstate struct { type forcegcstate struct {
lock mutex lock mutex
g *g g *g
idle uint32 idle uint32
} }
var gcphase uint32
// gcBlackenEnabled is 1 if mutator assists and background mark
// workers are allowed to blacken objects. This must only be set when
// gcphase == _GCmark.
var gcBlackenEnabled uint32
/* /*
* known to compiler * known to compiler
*/ */
......
...@@ -84,7 +84,7 @@ func growslice(t *slicetype, old slice, n int) slice { ...@@ -84,7 +84,7 @@ func growslice(t *slicetype, old slice, n int) slice {
memclr(add(p, lenmem), capmem-lenmem) memclr(add(p, lenmem), capmem-lenmem)
} else { } else {
// Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan unitialized memory. // Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan unitialized memory.
// TODO(rsc): Use memmove when !needwb(). // TODO(rsc): Use memmove when !writeBarrierEnabled.
p = newarray(et, uintptr(newcap)) p = newarray(et, uintptr(newcap))
for i := 0; i < old.len; i++ { for i := 0; i < old.len; i++ {
typedmemmove(et, add(p, uintptr(i)*et.size), add(old.array, uintptr(i)*et.size)) typedmemmove(et, add(p, uintptr(i)*et.size), add(old.array, uintptr(i)*et.size))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment