Commit 550dfc8a authored by Austin Clements's avatar Austin Clements

runtime: eliminate work.markrootdone and second root marking pass

Before STW and concurrent GC were unified, there could be either one
or two root marking passes per GC cycle. There were several tasks we
had to make sure happened once and only once (whether that was at the
beginning of concurrent mark for concurrent GC or during mark
termination for STW GC). We kept track of this in work.markrootdone.

Now that STW and concurrent GC both use the concurrent marking code
and we've eliminated all work done by the second root marking pass, we
only ever need a single root marking pass. Hence, we can eliminate
work.markrootdone and all of the code that's conditional on it.

Updates #26903.

Change-Id: I654a0f5e21b9322279525560a31e64b8d33b790f
Reviewed-on: https://go-review.googlesource.com/c/134784
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarRick Hudson <rlh@golang.org>
parent 873bd47d
...@@ -948,14 +948,6 @@ var work struct { ...@@ -948,14 +948,6 @@ var work struct {
nFlushCacheRoots int nFlushCacheRoots int
nDataRoots, nBSSRoots, nSpanRoots, nStackRoots int nDataRoots, nBSSRoots, nSpanRoots, nStackRoots int
// markrootDone indicates that roots have been marked at least
// once during the current GC cycle. This is checked by root
// marking operations that have to happen only during the
// first root marking pass, whether that's during the
// concurrent mark phase in current GC or mark termination in
// STW GC.
markrootDone bool
// Each type of GC state transition is protected by a lock. // Each type of GC state transition is protected by a lock.
// Since multiple threads can simultaneously detect the state // Since multiple threads can simultaneously detect the state
// transition condition, any thread that detects a transition // transition condition, any thread that detects a transition
...@@ -1456,9 +1448,6 @@ top: ...@@ -1456,9 +1448,6 @@ top:
// below. The important thing is that the wb remains active until // below. The important thing is that the wb remains active until
// all marking is complete. This includes writes made by the GC. // all marking is complete. This includes writes made by the GC.
// Record that one root marking pass has completed.
work.markrootDone = true
// Disable assists and background workers. We must do // Disable assists and background workers. We must do
// this before waking blocked assists. // this before waking blocked assists.
atomic.Store(&gcBlackenEnabled, 0) atomic.Store(&gcBlackenEnabled, 0)
...@@ -1909,19 +1898,20 @@ func gcMark(start_time int64) { ...@@ -1909,19 +1898,20 @@ func gcMark(start_time int64) {
} }
work.tstart = start_time work.tstart = start_time
// Queue root marking jobs.
gcMarkRootPrepare()
work.nwait = 0 work.nwait = 0
work.ndone = 0 work.ndone = 0
work.nproc = uint32(gcprocs()) work.nproc = uint32(gcprocs())
// Check that there's no marking work remaining. // Check that there's no marking work remaining.
if work.full != 0 || work.nDataRoots+work.nBSSRoots+work.nSpanRoots+work.nStackRoots != 0 { if work.full != 0 || work.markrootNext < work.markrootJobs {
print("runtime: full=", hex(work.full), " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n") print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n")
panic("non-empty mark queue after concurrent mark") panic("non-empty mark queue after concurrent mark")
} }
// Clear root marking queue.
work.markrootNext = 0
work.markrootJobs = 0
if work.nproc > 1 { if work.nproc > 1 {
noteclear(&work.alldone) noteclear(&work.alldone)
helpgc(int32(work.nproc)) helpgc(int32(work.nproc))
...@@ -1945,9 +1935,6 @@ func gcMark(start_time int64) { ...@@ -1945,9 +1935,6 @@ func gcMark(start_time int64) {
notesleep(&work.alldone) notesleep(&work.alldone)
} }
// Record that at least one root marking pass has completed.
work.markrootDone = true
// Clear out buffers and double-check that all gcWork caches // Clear out buffers and double-check that all gcWork caches
// are empty. This should be ensured by gcMarkDone before we // are empty. This should be ensured by gcMarkDone before we
// enter mark termination. // enter mark termination.
...@@ -2061,7 +2048,6 @@ func gcResetMarkState() { ...@@ -2061,7 +2048,6 @@ func gcResetMarkState() {
work.bytesMarked = 0 work.bytesMarked = 0
work.initialHeapLive = atomic.Load64(&memstats.heap_live) work.initialHeapLive = atomic.Load64(&memstats.heap_live)
work.markrootDone = false
} }
// Hooks for other packages // Hooks for other packages
......
...@@ -62,57 +62,41 @@ func gcMarkRootPrepare() { ...@@ -62,57 +62,41 @@ func gcMarkRootPrepare() {
work.nDataRoots = 0 work.nDataRoots = 0
work.nBSSRoots = 0 work.nBSSRoots = 0
// Only scan globals once per cycle; preferably concurrently. // Scan globals.
if !work.markrootDone { for _, datap := range activeModules() {
for _, datap := range activeModules() { nDataRoots := nBlocks(datap.edata - datap.data)
nDataRoots := nBlocks(datap.edata - datap.data) if nDataRoots > work.nDataRoots {
if nDataRoots > work.nDataRoots { work.nDataRoots = nDataRoots
work.nDataRoots = nDataRoots
}
} }
}
for _, datap := range activeModules() { for _, datap := range activeModules() {
nBSSRoots := nBlocks(datap.ebss - datap.bss) nBSSRoots := nBlocks(datap.ebss - datap.bss)
if nBSSRoots > work.nBSSRoots { if nBSSRoots > work.nBSSRoots {
work.nBSSRoots = nBSSRoots work.nBSSRoots = nBSSRoots
}
} }
} }
if !work.markrootDone { // Scan span roots for finalizer specials.
// On the first markroot, we need to scan span roots. //
// In concurrent GC, this happens during concurrent // We depend on addfinalizer to mark objects that get
// mark and we depend on addfinalizer to ensure the // finalizers after root marking.
// above invariants for objects that get finalizers //
// after concurrent mark. In STW GC, this will happen // We're only interested in scanning the in-use spans,
// during mark termination. // which will all be swept at this point. More spans
// // may be added to this list during concurrent GC, but
// We're only interested in scanning the in-use spans, // we only care about spans that were allocated before
// which will all be swept at this point. More spans // this mark phase.
// may be added to this list during concurrent GC, but work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks()
// we only care about spans that were allocated before
// this mark phase. // Scan stacks.
work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks() //
// Gs may be created after this point, but it's okay that we
// On the first markroot, we need to scan all Gs. Gs // ignore them because they begin life without any roots, so
// may be created after this point, but it's okay that // there's nothing to scan, and any roots they create during
// we ignore them because they begin life without any // the concurrent phase will be scanned during mark
// roots, so there's nothing to scan, and any roots // termination.
// they create during the concurrent phase will be work.nStackRoots = int(atomic.Loaduintptr(&allglen))
// scanned during mark termination. During mark
// termination, allglen isn't changing, so we'll scan
// all Gs.
work.nStackRoots = int(atomic.Loaduintptr(&allglen))
} else {
// We've already scanned span roots and kept the scan
// up-to-date during concurrent mark.
work.nSpanRoots = 0
// The hybrid barrier ensures that stacks can't
// contain pointers to unmarked objects, so on the
// second markroot, there's no need to scan stacks.
work.nStackRoots = 0
}
work.markrootNext = 0 work.markrootNext = 0
work.markrootJobs = uint32(fixedRootCount + work.nFlushCacheRoots + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots) work.markrootJobs = uint32(fixedRootCount + work.nFlushCacheRoots + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots)
...@@ -183,24 +167,15 @@ func markroot(gcw *gcWork, i uint32) { ...@@ -183,24 +167,15 @@ func markroot(gcw *gcWork, i uint32) {
} }
case i == fixedRootFinalizers: case i == fixedRootFinalizers:
// Only do this once per GC cycle since we don't call
// queuefinalizer during marking.
if work.markrootDone {
break
}
for fb := allfin; fb != nil; fb = fb.alllink { for fb := allfin; fb != nil; fb = fb.alllink {
cnt := uintptr(atomic.Load(&fb.cnt)) cnt := uintptr(atomic.Load(&fb.cnt))
scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw) scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw)
} }
case i == fixedRootFreeGStacks: case i == fixedRootFreeGStacks:
// Only do this once per GC cycle; preferably // Switch to the system stack so we can call
// concurrently. // stackfree.
if !work.markrootDone { systemstack(markrootFreeGStacks)
// Switch to the system stack so we can call
// stackfree.
systemstack(markrootFreeGStacks)
}
case baseSpans <= i && i < baseStacks: case baseSpans <= i && i < baseStacks:
// mark MSpan.specials // mark MSpan.specials
...@@ -324,10 +299,6 @@ func markrootSpans(gcw *gcWork, shard int) { ...@@ -324,10 +299,6 @@ func markrootSpans(gcw *gcWork, shard int) {
// TODO(austin): There are several ideas for making this more // TODO(austin): There are several ideas for making this more
// efficient in issue #11485. // efficient in issue #11485.
if work.markrootDone {
throw("markrootSpans during second markroot")
}
sg := mheap_.sweepgen sg := mheap_.sweepgen
spans := mheap_.sweepSpans[mheap_.sweepgen/2%2].block(shard) spans := mheap_.sweepSpans[mheap_.sweepgen/2%2].block(shard)
// Note that work.spans may not include spans that were // Note that work.spans may not include spans that were
...@@ -719,11 +690,8 @@ func scanstack(gp *g, gcw *gcWork) { ...@@ -719,11 +690,8 @@ func scanstack(gp *g, gcw *gcWork) {
throw("can't scan gchelper stack") throw("can't scan gchelper stack")
} }
// Shrink the stack if not much of it is being used. During // Shrink the stack if not much of it is being used.
// concurrent GC, we can do this during concurrent mark. shrinkstack(gp)
if !work.markrootDone {
shrinkstack(gp)
}
// Scan the saved context register. This is effectively a live // Scan the saved context register. This is effectively a live
// register that gets moved back and forth between the // register that gets moved back and forth between the
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment