runtime: don't start workers between mark 1 & 2

Currently we clear both the mark 1 and mark 2 signals at the beginning of concurrent mark. If either if these is clear, it acts as a signal to the scheduler that it should start background workers. However, this means that in the interim *between* mark 1 and mark 2, the scheduler basically loops starting up new workers only to have them return with nothing to do. In addition to harming performance and delaying mutator work, this approach has a race where workers started for mark 1 can mistakenly signal mark 2, causing it to complete prematurely. This approach also interferes with starting assists earlier to fix #11677. Fix this by initially setting both mark 1 and mark 2 to "signaled". The scheduler will not start background mark workers, though assists can still run. When we're ready to enter mark 1, we clear the mark 1 signal and wait for it. Then, when we're ready to enter mark 2, we clear the mark 2 signal and wait for it. This structure also lets us deal cleanly with the situation where all work is drained *prior* to the mark 2 wait, meaning that there may be no workers to signal completion. Currently we deal with this using a racy (and possibly incorrect) check for work in the coordinator itself to skip the mark 2 wait if there's no work. This change makes the coordinator unconditionally wait for mark completion and makes the scheduler itself signal completion by slightly extending the logic it already has to determine that there's no work and hence no use in starting a new worker. This is a prerequisite to fixing the remaining component of #11677, which will require enabling assists during the scan phase. However, we don't want to enable background workers until the mark phase because they will compete with the scan. This change lets us use bgMark1 and bgMark2 to indicate when it's okay to start background workers independent of assists. This is also a prerequisite to fixing #11694. It significantly reduces the occurrence of long mark termination pauses in #11694 (from 64 out of 1000 to 2 out of 1000 in one experiment). Coincidentally, this also reduces the final heap size (and hence run time) of TestTraceStress from ~100 MB and ~1.9 seconds to ~14 MB and ~0.4 seconds because it significantly shortens concurrent mark duration. Rick Hudson <rlh> did the hard work of tracking this down. Change-Id: I12ea9ee2db9a0ae9d3a90dde4944a75fcf408f4c Reviewed-on: https://go-review.googlesource.com/12672Reviewed-by: Russ Cox <rsc@golang.org>

runtime: don't start workers between mark 1 & 2
Currently we clear both the mark 1 and mark 2 signals at the beginning of concurrent mark. If either if these is clear, it acts as a signal to the scheduler that it should start background workers. However, this means that in the interim *between* mark 1 and mark 2, the scheduler basically loops starting up new workers only to have them return with nothing to do. In addition to harming performance and delaying mutator work, this approach has a race where workers started for mark 1 can mistakenly signal mark 2, causing it to complete prematurely. This approach also interferes with starting assists earlier to fix #11677. Fix this by initially setting both mark 1 and mark 2 to "signaled". The scheduler will not start background mark workers, though assists can still run. When we're ready to enter mark 1, we clear the mark 1 signal and wait for it. Then, when we're ready to enter mark 2, we clear the mark 2 signal and wait for it. This structure also lets us deal cleanly with the situation where all work is drained *prior* to the mark 2 wait, meaning that there may be no workers to signal completion. Currently we deal with this using a racy (and possibly incorrect) check for work in the coordinator itself to skip the mark 2 wait if there's no work. This change makes the coordinator unconditionally wait for mark completion and makes the scheduler itself signal completion by slightly extending the logic it already has to determine that there's no work and hence no use in starting a new worker. This is a prerequisite to fixing the remaining component of #11677, which will require enabling assists during the scan phase. However, we don't want to enable background workers until the mark phase because they will compete with the scan. This change lets us use bgMark1 and bgMark2 to indicate when it's okay to start background workers independent of assists. This is also a prerequisite to fixing #11694. It significantly reduces the occurrence of long mark termination pauses in #11694 (from 64 out of 1000 to 2 out of 1000 in one experiment). Coincidentally, this also reduces the final heap size (and hence run time) of TestTraceStress from ~100 MB and ~1.9 seconds to ~14 MB and ~0.4 seconds because it significantly shortens concurrent mark duration. Rick Hudson <rlh> did the hard work of tracking this down. Change-Id: I12ea9ee2db9a0ae9d3a90dde4944a75fcf408f4c Reviewed-on: https://go-review.googlesource.com/12672Reviewed-by: Russ Cox <rsc@golang.org>
64a32ffe · Austin Clements · 8f34b253 · 64a32ffe
Commit 64a32ffe authored Jul 24, 2015 by Austin Clements
Hide whitespace changes
Inline Side-by-side

Showing with 32 additions and 13 deletions

src/runtime/mgc.go src/runtime/mgc.go +32 -13

No files found.
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -594,6 +594,28 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
 			// there are still assists tapering off. Don't
 			// bother running background mark because
 			// it'll just return immediately.
+			if work.nwait == work.nproc {
+				// There are also no workers, which
+				// means we've reached a completion point.
+				// There may not be any workers to
+				// signal it, so signal it here.
+				readied := false
+				if gcBlackenPromptly {
+					if work.bgMark1.done == 0 {
+						throw("completing mark 2, but bgMark1.done == 0")
+					}
+					readied = work.bgMark2.complete()
+				} else {
+					readied = work.bgMark1.complete()
+				}
+				if readied {
+					// complete just called ready,
+					// but we're inside the
+					// scheduler. Let it know that
+					// that's okay.
+					resetspinning()
+				}
+			}
 			return nil
 		}
 		if !decIfPositive(&c.fractionalMarkWorkersNeeded) {
@@ -710,7 +732,7 @@ func (s *bgMarkSignal) wait() {
 // The caller should arrange to deschedule itself as soon as possible
 // after calling complete in order to let the coordinator goroutine
 // run.
-func (s *bgMarkSignal) complete() {
+func (s *bgMarkSignal) complete() bool {
 	if cas(&s.done, 0, 1) {
 		// This is the first worker to reach this completion point.
 		// Signal the main GC goroutine.
@@ -722,7 +744,9 @@ func (s *bgMarkSignal) complete() {
 			ready(s.g, 0)
 		}
 		unlock(&s.lock)
+		return true
 	}
+	return false
 }

 func (s *bgMarkSignal) clear() {
@@ -963,6 +987,7 @@ func gc(mode int) {
 		}

 		// Wait for background mark completion.
+		work.bgMark1.clear()
 		work.bgMark1.wait()

 		// The global work list is empty, but there can still be work
@@ -978,16 +1003,10 @@ func gc(mode int) {
 			})
 		})

-		if atomicload64(&work.full) != 0 || atomicload64(&work.partial) != 0 {
-			if work.bgMark2.done != 0 {
-				throw("work.bgMark2.done != 0")
-			}
-			gcBlackenPromptly = true
-			// Wait for this more aggressive background mark to complete.
-			work.bgMark2.wait()
-		} else {
-			work.bgMark2.done = 1
-		}
+		gcBlackenPromptly = true
+		// Wait for this more aggressive background mark to complete.
+		work.bgMark2.clear()
+		work.bgMark2.wait()

 		// Begin mark termination.
 		now = nanotime()
@@ -1209,8 +1228,8 @@ func gcBgMarkPrepare() {
 	work.nwait = ^uint32(0)

 	// Reset background mark completion points.
-	work.bgMark1.clear()
-	work.bgMark2.clear()
+	work.bgMark1.done = 1
+	work.bgMark2.done = 1
 	gcController.bgMarkStartTime = nanotime()
 }