Commit 0bae74e8 authored by Austin Clements's avatar Austin Clements

runtime: wake idle Ps when enqueuing GC work

If the scheduler has no user work and there's no GC work visible, it
puts the P to sleep (or blocks on the network). However, if we later
enqueue more GC work, there's currently nothing that specifically
wakes up the scheduler to let it start an idle GC worker. As a result,
we can underutilize the CPU during GC if Ps have been put to sleep.

Fix this by making GC wake idle Ps when work buffers are put on the
full list. We already have a hook to do this, since we use this to
preempt a random P if we need more dedicated workers. We expand this
hook to instead wake an idle P if there is one. The logic we use for
this is identical to the logic used to wake an idle P when we ready a
goroutine.

To make this really sound, we also fix the scheduler to re-check the
idle GC worker condition after releasing its P. This closes a race
where 1) the scheduler checks for idle work and finds none, 2) new
work is enqueued but there are no idle Ps so none are woken, and 3)
the scheduler releases its P.

There is one subtlety here. Currently we call enlistWorker directly
from putfull, but the gcWork is in an inconsistent state in the places
that call putfull. This isn't a problem right now because nothing that
enlistWorker does touches the gcWork, but with the added call to
wakep, it's possible to get a recursive call into the gcWork
(specifically, while write barriers are disallowed, this can do an
allocation, which can dispose a gcWork, which can put a workbuf). To
handle this, we lift the enlistWorker calls up a layer and delay them
until the gcWork is in a consistent state.

Fixes #14179.

Change-Id: Ia2467a52e54c9688c3c1752e1fc00f5b37bbfeeb
Reviewed-on: https://go-review.googlesource.com/32434
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarDmitry Vyukov <dvyukov@google.com>
parent 49ea9207
...@@ -624,6 +624,14 @@ func (c *gcControllerState) endCycle() { ...@@ -624,6 +624,14 @@ func (c *gcControllerState) endCycle() {
// //
//go:nowritebarrier //go:nowritebarrier
func (c *gcControllerState) enlistWorker() { func (c *gcControllerState) enlistWorker() {
// If there are idle Ps, wake one so it will run an idle worker.
if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
wakep()
return
}
// There are no idle Ps. If we need more dedicated workers,
// try to preempt a running P so it will switch to a worker.
if c.dedicatedMarkWorkersNeeded <= 0 { if c.dedicatedMarkWorkersNeeded <= 0 {
return return
} }
......
...@@ -99,6 +99,7 @@ func (w *gcWork) init() { ...@@ -99,6 +99,7 @@ func (w *gcWork) init() {
// obj must point to the beginning of a heap object or an oblet. // obj must point to the beginning of a heap object or an oblet.
//go:nowritebarrier //go:nowritebarrier
func (w *gcWork) put(obj uintptr) { func (w *gcWork) put(obj uintptr) {
flushed := false
wbuf := w.wbuf1.ptr() wbuf := w.wbuf1.ptr()
if wbuf == nil { if wbuf == nil {
w.init() w.init()
...@@ -111,11 +112,20 @@ func (w *gcWork) put(obj uintptr) { ...@@ -111,11 +112,20 @@ func (w *gcWork) put(obj uintptr) {
putfull(wbuf) putfull(wbuf)
wbuf = getempty() wbuf = getempty()
w.wbuf1 = wbufptrOf(wbuf) w.wbuf1 = wbufptrOf(wbuf)
flushed = true
} }
} }
wbuf.obj[wbuf.nobj] = obj wbuf.obj[wbuf.nobj] = obj
wbuf.nobj++ wbuf.nobj++
// If we put a buffer on full, let the GC controller know so
// it can encourage more workers to run. We delay this until
// the end of put so that w is in a consistent state, since
// enlistWorker may itself manipulate w.
if flushed && gcphase == _GCmark {
gcController.enlistWorker()
}
} }
// putFast does a put and returns true if it can be done quickly // putFast does a put and returns true if it can be done quickly
...@@ -263,6 +273,12 @@ func (w *gcWork) balance() { ...@@ -263,6 +273,12 @@ func (w *gcWork) balance() {
w.wbuf2 = wbufptrOf(getempty()) w.wbuf2 = wbufptrOf(getempty())
} else if wbuf := w.wbuf1.ptr(); wbuf.nobj > 4 { } else if wbuf := w.wbuf1.ptr(); wbuf.nobj > 4 {
w.wbuf1 = wbufptrOf(handoff(wbuf)) w.wbuf1 = wbufptrOf(handoff(wbuf))
} else {
return
}
// We flushed a buffer to the full list, so wake a worker.
if gcphase == _GCmark {
gcController.enlistWorker()
} }
} }
...@@ -337,12 +353,6 @@ func putempty(b *workbuf) { ...@@ -337,12 +353,6 @@ func putempty(b *workbuf) {
func putfull(b *workbuf) { func putfull(b *workbuf) {
b.checknonempty() b.checknonempty()
lfstackpush(&work.full, &b.node) lfstackpush(&work.full, &b.node)
// We just made more work available. Let the GC controller
// know so it can encourage more workers to run.
if gcphase == _GCmark {
gcController.enlistWorker()
}
} }
// trygetfull tries to get a full or partially empty workbuffer. // trygetfull tries to get a full or partially empty workbuffer.
......
...@@ -2023,6 +2023,26 @@ stop: ...@@ -2023,6 +2023,26 @@ stop:
} }
} }
// Check for idle-priority GC work again.
if gcBlackenEnabled != 0 && gcMarkWorkAvailable(nil) {
lock(&sched.lock)
_p_ = pidleget()
if _p_ != nil && _p_.gcBgMarkWorker == 0 {
pidleput(_p_)
_p_ = nil
}
unlock(&sched.lock)
if _p_ != nil {
acquirep(_p_)
if wasSpinning {
_g_.m.spinning = true
atomic.Xadd(&sched.nmspinning, 1)
}
// Go back to idle GC check.
goto stop
}
}
// poll network // poll network
if netpollinited() && atomic.Xchg64(&sched.lastpoll, 0) != 0 { if netpollinited() && atomic.Xchg64(&sched.lastpoll, 0) != 0 {
if _g_.m.p != 0 { if _g_.m.p != 0 {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment