Commit eb3b1830 authored by Austin Clements's avatar Austin Clements

runtime: attach mark workers to P after they park

Currently mark workers attach to their designated Ps before parking,
either during initialization or after performing a phase transition.
However, in both of these cases, it's possible that the mark worker is
running on a different P than the one it attaches to. This is a
problem, because as soon as the worker attaches to a P, that P's
scheduler can execute the worker. If the worker hasn't yet parked on
the P it's actually running on, this means the worker G will be
running in two places at once. The most visible consequence of this is
that once the first instance of the worker does park, it will clear
g.m and the second instance will crash shortly when it tries to use
g.m.

Fix this by moving the attach to the gopark callback. At this point,
the G is genuinely stopped and the callback is running on the system
stack, so it's safe for another P's scheduler to pick up the worker G.

Fixes #13363. Fixes #13978.

Change-Id: If2f7c4a4174f9511f6227e14a27c56fb842d1cc8
Reviewed-on: https://go-review.googlesource.com/18761Reviewed-by: default avatarRick Hudson <rlh@golang.org>
Reviewed-by: default avatarRuss Cox <rsc@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
parent 73d590b4
...@@ -1349,15 +1349,22 @@ func gcBgMarkPrepare() { ...@@ -1349,15 +1349,22 @@ func gcBgMarkPrepare() {
work.nwait = ^uint32(0) work.nwait = ^uint32(0)
} }
func gcBgMarkWorker(p *p) { func gcBgMarkWorker(_p_ *p) {
// Register this G as the background mark worker for p. type parkInfo struct {
m *m // Release this m on park.
attach *p // If non-nil, attach to this p on park.
}
var park parkInfo
// casgp is casp for *g's.
casgp := func(gpp **g, old, new *g) bool { casgp := func(gpp **g, old, new *g) bool {
return casp((*unsafe.Pointer)(unsafe.Pointer(gpp)), unsafe.Pointer(old), unsafe.Pointer(new)) return casp((*unsafe.Pointer)(unsafe.Pointer(gpp)), unsafe.Pointer(old), unsafe.Pointer(new))
} }
gp := getg() gp := getg()
mp := acquirem() park.m = acquirem()
owned := casgp(&p.gcBgMarkWorker, nil, gp) park.attach = _p_
// Inform gcBgMarkStartWorkers that this worker is ready.
// After this point, the background mark worker is scheduled // After this point, the background mark worker is scheduled
// cooperatively by gcController.findRunnable. Hence, it must // cooperatively by gcController.findRunnable. Hence, it must
// never be preempted, as this would put it into _Grunnable // never be preempted, as this would put it into _Grunnable
...@@ -1365,33 +1372,51 @@ func gcBgMarkWorker(p *p) { ...@@ -1365,33 +1372,51 @@ func gcBgMarkWorker(p *p) {
// is set, this puts itself into _Gwaiting to be woken up by // is set, this puts itself into _Gwaiting to be woken up by
// gcController.findRunnable at the appropriate time. // gcController.findRunnable at the appropriate time.
notewakeup(&work.bgMarkReady) notewakeup(&work.bgMarkReady)
if !owned {
// A sleeping worker came back and reassociated with
// the P. That's fine.
releasem(mp)
return
}
for { for {
// Go to sleep until woken by gcContoller.findRunnable. // Go to sleep until woken by gcContoller.findRunnable.
// We can't releasem yet since even the call to gopark // We can't releasem yet since even the call to gopark
// may be preempted. // may be preempted.
gopark(func(g *g, mp unsafe.Pointer) bool { gopark(func(g *g, parkp unsafe.Pointer) bool {
releasem((*m)(mp)) park := (*parkInfo)(parkp)
// The worker G is no longer running, so it's
// now safe to allow preemption.
releasem(park.m)
// If the worker isn't attached to its P,
// attach now. During initialization and after
// a phase change, the worker may have been
// running on a different P. As soon as we
// attach, the owner P may schedule the
// worker, so this must be done after the G is
// stopped.
if park.attach != nil {
p := park.attach
park.attach = nil
// cas the worker because we may be
// racing with a new worker starting
// on this P.
if !casgp(&p.gcBgMarkWorker, nil, g) {
// The P got a new worker.
// Exit this worker.
return false
}
}
return true return true
}, unsafe.Pointer(mp), "GC worker (idle)", traceEvGoBlock, 0) }, noescape(unsafe.Pointer(&park)), "GC worker (idle)", traceEvGoBlock, 0)
// Loop until the P dies and disassociates this // Loop until the P dies and disassociates this
// worker. (The P may later be reused, in which case // worker (the P may later be reused, in which case
// it will get a new worker.) // it will get a new worker) or we failed to associate.
if p.gcBgMarkWorker != gp { if _p_.gcBgMarkWorker != gp {
break break
} }
// Disable preemption so we can use the gcw. If the // Disable preemption so we can use the gcw. If the
// scheduler wants to preempt us, we'll stop draining, // scheduler wants to preempt us, we'll stop draining,
// dispose the gcw, and then preempt. // dispose the gcw, and then preempt.
mp = acquirem() park.m = acquirem()
if gcBlackenEnabled == 0 { if gcBlackenEnabled == 0 {
throw("gcBgMarkWorker: blackening not enabled") throw("gcBgMarkWorker: blackening not enabled")
...@@ -1405,13 +1430,13 @@ func gcBgMarkWorker(p *p) { ...@@ -1405,13 +1430,13 @@ func gcBgMarkWorker(p *p) {
throw("work.nwait was > work.nproc") throw("work.nwait was > work.nproc")
} }
switch p.gcMarkWorkerMode { switch _p_.gcMarkWorkerMode {
default: default:
throw("gcBgMarkWorker: unexpected gcMarkWorkerMode") throw("gcBgMarkWorker: unexpected gcMarkWorkerMode")
case gcMarkWorkerDedicatedMode: case gcMarkWorkerDedicatedMode:
gcDrain(&p.gcw, gcDrainNoBlock|gcDrainFlushBgCredit) gcDrain(&_p_.gcw, gcDrainNoBlock|gcDrainFlushBgCredit)
case gcMarkWorkerFractionalMode, gcMarkWorkerIdleMode: case gcMarkWorkerFractionalMode, gcMarkWorkerIdleMode:
gcDrain(&p.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
} }
// If we are nearing the end of mark, dispose // If we are nearing the end of mark, dispose
...@@ -1421,12 +1446,12 @@ func gcBgMarkWorker(p *p) { ...@@ -1421,12 +1446,12 @@ func gcBgMarkWorker(p *p) {
// no workers and no work while we have this // no workers and no work while we have this
// cached, and before we compute done. // cached, and before we compute done.
if gcBlackenPromptly { if gcBlackenPromptly {
p.gcw.dispose() _p_.gcw.dispose()
} }
// Account for time. // Account for time.
duration := nanotime() - startTime duration := nanotime() - startTime
switch p.gcMarkWorkerMode { switch _p_.gcMarkWorkerMode {
case gcMarkWorkerDedicatedMode: case gcMarkWorkerDedicatedMode:
atomic.Xaddint64(&gcController.dedicatedMarkTime, duration) atomic.Xaddint64(&gcController.dedicatedMarkTime, duration)
atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1) atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1)
...@@ -1441,7 +1466,7 @@ func gcBgMarkWorker(p *p) { ...@@ -1441,7 +1466,7 @@ func gcBgMarkWorker(p *p) {
// of work? // of work?
incnwait := atomic.Xadd(&work.nwait, +1) incnwait := atomic.Xadd(&work.nwait, +1)
if incnwait > work.nproc { if incnwait > work.nproc {
println("runtime: p.gcMarkWorkerMode=", p.gcMarkWorkerMode, println("runtime: p.gcMarkWorkerMode=", _p_.gcMarkWorkerMode,
"work.nwait=", incnwait, "work.nproc=", work.nproc) "work.nwait=", incnwait, "work.nproc=", work.nproc)
throw("work.nwait > work.nproc") throw("work.nwait > work.nproc")
} }
...@@ -1453,21 +1478,19 @@ func gcBgMarkWorker(p *p) { ...@@ -1453,21 +1478,19 @@ func gcBgMarkWorker(p *p) {
// as the worker for this P so // as the worker for this P so
// findRunnableGCWorker doesn't try to // findRunnableGCWorker doesn't try to
// schedule it. // schedule it.
p.gcBgMarkWorker = nil _p_.gcBgMarkWorker = nil
releasem(mp) releasem(park.m)
gcMarkDone() gcMarkDone()
// Disable preemption and reassociate with the P. // Disable preemption and prepare to reattach
// to the P.
// //
// We may be running on a different P at this // We may be running on a different P at this
// point, so this has to be done carefully. // point, so we can't reattach until this G is
mp = acquirem() // parked.
if !casgp(&p.gcBgMarkWorker, nil, gp) { park.m = acquirem()
// The P got a new worker. park.attach = _p_
releasem(mp)
break
}
} }
} }
} }
......
...@@ -3212,6 +3212,8 @@ func procresize(nprocs int32) *p { ...@@ -3212,6 +3212,8 @@ func procresize(nprocs int32) *p {
traceGoUnpark(p.gcBgMarkWorker, 0) traceGoUnpark(p.gcBgMarkWorker, 0)
} }
globrunqput(p.gcBgMarkWorker) globrunqput(p.gcBgMarkWorker)
// This assignment doesn't race because the
// world is stopped.
p.gcBgMarkWorker = nil p.gcBgMarkWorker = nil
} }
for i := range p.sudogbuf { for i := range p.sudogbuf {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment