runtime: fix races on mheap.allspans

This is based on the crash dump provided by Alan and on mental experiments: sweep 0 74 fatal error: gc: unswept span runtime stack: runtime.throw(0x9df60d) markroot(0xc208002000, 0x3) runtime.parfordo(0xc208002000) runtime.gchelper() I think that when we moved all stacks into heap, we introduced a bunch of bad data races. This was later worsened by parallel stack shrinking. Observation 1: exitsyscall can allocate a stack from heap at any time (including during STW). Observation 2: parallel stack shrinking can (surprisingly) grow heap during marking. Consider that we steadily grow stacks of a number of goroutines from 8K to 16K. And during GC they all can be shrunk back to 8K. Shrinking will allocate lots of 8K stacks, and we do not necessary have that many in heap at this moment. So shrinking can grow heap as well. Consequence: any access to mheap.allspans in GC (and otherwise) must take heap lock. This is not true in several places. Fix this by protecting accesses to mheap.allspans and introducing allspans cache for marking, similar to what we use for sweeping. LGTM=rsc R=golang-codereviews, rsc CC=adonovan, golang-codereviews, khr, rlh https://golang.org/cl/126510043

runtime: fix races on mheap.allspans
This is based on the crash dump provided by Alan and on mental experiments: sweep 0 74 fatal error: gc: unswept span runtime stack: runtime.throw(0x9df60d) markroot(0xc208002000, 0x3) runtime.parfordo(0xc208002000) runtime.gchelper() I think that when we moved all stacks into heap, we introduced a bunch of bad data races. This was later worsened by parallel stack shrinking. Observation 1: exitsyscall can allocate a stack from heap at any time (including during STW). Observation 2: parallel stack shrinking can (surprisingly) grow heap during marking. Consider that we steadily grow stacks of a number of goroutines from 8K to 16K. And during GC they all can be shrunk back to 8K. Shrinking will allocate lots of 8K stacks, and we do not necessary have that many in heap at this moment. So shrinking can grow heap as well. Consequence: any access to mheap.allspans in GC (and otherwise) must take heap lock. This is not true in several places. Fix this by protecting accesses to mheap.allspans and introducing allspans cache for marking, similar to what we use for sweeping. LGTM=rsc R=golang-codereviews, rsc CC=adonovan, golang-codereviews, khr, rlh https://golang.org/cl/126510043
7f223e3b · Dmitriy Vyukov · b91223ed · 7f223e3b · 7f223e3b · 7f223e3b
Commit 7f223e3b authored Aug 24, 2014 by Dmitriy Vyukov
Hide whitespace changes
Inline Side-by-side

Showing with 45 additions and 27 deletions

src/pkg/runtime/malloc.h src/pkg/runtime/malloc.h +1 -1

src/pkg/runtime/mgc0.c src/pkg/runtime/mgc0.c +43 -25

src/pkg/runtime/mheap.c src/pkg/runtime/mheap.c +1 -1

No files found.
--- a/src/pkg/runtime/malloc.h
+++ b/src/pkg/runtime/malloc.h
@@ -465,7 +465,7 @@ struct MHeap
 	MSpan busy[MaxMHeapList];	// busy lists of large objects of given length
 	MSpan busylarge;		// busy lists of large objects length >= MaxMHeapList
 	MSpan **allspans;		// all spans out there
-	MSpan **sweepspans;		// copy of allspans referenced by sweeper
+	MSpan **gcspans;		// copy of allspans referenced by GC marker or sweeper
 	uint32	nspan;
 	uint32	nspancap;
 	uint32	sweepgen;		// sweep generation, see comment in MSpan

--- a/src/pkg/runtime/mgc0.c
+++ b/src/pkg/runtime/mgc0.c
@@ -202,6 +202,10 @@ static struct {
 	volatile uint32	ndone;
 	Note	alldone;
 	ParFor*	markfor;
+
+	// Copy of mheap.allspans for marker or sweeper.
+	MSpan**	spans;
+	uint32	nspan;
 } work;

 // scanblock scans a block of n bytes starting at pointer b for references
@@ -500,8 +504,7 @@ static void
 markroot(ParFor *desc, uint32 i)
 {
 	FinBlock *fb;
-	MHeap *h;
-	MSpan **allspans, *s;
+	MSpan *s;
 	uint32 spanidx, sg;
 	G *gp;
 	void *p;
@@ -524,14 +527,12 @@ markroot(ParFor *desc, uint32 i)

 	case RootSpans:
 		// mark MSpan.specials
-		h = &runtime·mheap;
-		sg = h->sweepgen;
-		allspans = h->allspans;
-		for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
+		sg = runtime·mheap.sweepgen;
+		for(spanidx=0; spanidx<work.nspan; spanidx++) {
 			Special *sp;
 			SpecialFinalizer *spf;

-			s = allspans[spanidx];
+			s = work.spans[spanidx];
 			if(s->state != MSpanInUse)
 				continue;
 			if(s->sweepgen != sg) {
@@ -1084,9 +1085,7 @@ static struct
 	G*	g;
 	bool	parked;

-	MSpan**	spans;
-	uint32	nspan;
-	uint32	spanidx;
+	uint32	spanidx;	// background sweeper position

 	uint32	nbgsweep;
 	uint32	npausesweep;
@@ -1131,12 +1130,12 @@ runtime·sweepone(void)
 	sg = runtime·mheap.sweepgen;
 	for(;;) {
 		idx = runtime·xadd(&sweep.spanidx, 1) - 1;
-		if(idx >= sweep.nspan) {
+		if(idx >= work.nspan) {
 			runtime·mheap.sweepdone = true;
 			g->m->locks--;
 			return -1;
 		}
-		s = sweep.spans[idx];
+		s = work.spans[idx];
 		if(s->state != MSpanInUse) {
 			s->sweepgen = sg;
 			continue;
@@ -1259,6 +1258,7 @@ runtime·updatememstats(GCStats *stats)
 	cachestats();

 	// Scan all spans and count number of alive objects.
+	runtime·lock(&runtime·mheap.lock);
 	for(i = 0; i < runtime·mheap.nspan; i++) {
 		s = runtime·mheap.allspans[i];
 		if(s->state != MSpanInUse)
@@ -1272,6 +1272,7 @@ runtime·updatememstats(GCStats *stats)
 			mstats.alloc += s->ref*s->elemsize;
 		}
 	}
+	runtime·unlock(&runtime·mheap.lock);

 	// Aggregate by size class.
 	smallfree = 0;
@@ -1441,6 +1442,24 @@ gc(struct gc_args *args)
 	while(runtime·sweepone() != -1)
 		sweep.npausesweep++;

+	// Cache runtime.mheap.allspans in work.spans to avoid conflicts with
+	// resizing/freeing allspans.
+	// New spans can be created while GC progresses, but they are not garbage for
+	// this round:
+	//  - new stack spans can be created even while the world is stopped.
+	//  - new malloc spans can be created during the concurrent sweep
+
+	// Even if this is stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
+	runtime·lock(&runtime·mheap.lock);
+	// Free the old cached sweep array if necessary.
+	if(work.spans != nil && work.spans != runtime·mheap.allspans)
+		runtime·SysFree(work.spans, work.nspan*sizeof(work.spans[0]), &mstats.other_sys);
+	// Cache the current array for marking.
+	runtime·mheap.gcspans = runtime·mheap.allspans;
+	work.spans = runtime·mheap.allspans;
+	work.nspan = runtime·mheap.nspan;
+	runtime·unlock(&runtime·mheap.lock);
+
 	work.nwait = 0;
 	work.ndone = 0;
 	work.nproc = runtime·gcprocs();
@@ -1500,28 +1519,27 @@ gc(struct gc_args *args)
 			mstats.numgc, work.nproc, (t1-t0)/1000, (t2-t1)/1000, (t3-t2)/1000, (t4-t3)/1000,
 			heap0>>20, heap1>>20, obj,
 			mstats.nmalloc, mstats.nfree,
-			sweep.nspan, sweep.nbgsweep, sweep.npausesweep,
+			work.nspan, sweep.nbgsweep, sweep.npausesweep,
 			stats.nhandoff, stats.nhandoffcnt,
 			work.markfor->nsteal, work.markfor->nstealcnt,
 			stats.nprocyield, stats.nosyield, stats.nsleep);
 		sweep.nbgsweep = sweep.npausesweep = 0;
 	}

-	// We cache current runtime·mheap.allspans array in sweep.spans,
-	// because the former can be resized and freed.
-	// Otherwise we would need to take heap lock every time
-	// we want to convert span index to span pointer.
-
-	// Free the old cached array if necessary.
-	if(sweep.spans && sweep.spans != runtime·mheap.allspans)
-		runtime·SysFree(sweep.spans, sweep.nspan*sizeof(sweep.spans[0]), &mstats.other_sys);
-	// Cache the current array.
-	runtime·mheap.sweepspans = runtime·mheap.allspans;
+	// See the comment in the beginning of this function as to why we need the following.
+	// Even if this is still stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
+	runtime·lock(&runtime·mheap.lock);
+	// Free the old cached mark array if necessary.
+	if(work.spans != nil && work.spans != runtime·mheap.allspans)
+		runtime·SysFree(work.spans, work.nspan*sizeof(work.spans[0]), &mstats.other_sys);
+	// Cache the current array for sweeping.
+	runtime·mheap.gcspans = runtime·mheap.allspans;
 	runtime·mheap.sweepgen += 2;
 	runtime·mheap.sweepdone = false;
-	sweep.spans = runtime·mheap.allspans;
-	sweep.nspan = runtime·mheap.nspan;
+	work.spans = runtime·mheap.allspans;
+	work.nspan = runtime·mheap.nspan;
 	sweep.spanidx = 0;
+	runtime·unlock(&runtime·mheap.lock);

 	// Temporary disable concurrent sweep, because we see failures on builders.
 	if(ConcurrentSweep && !args->eagersweep) {

--- a/src/pkg/runtime/mheap.c
+++ b/src/pkg/runtime/mheap.c
@@ -43,7 +43,7 @@ RecordSpan(void *vh, byte *p)
 			runtime·memmove(all, h->allspans, h->nspancap*sizeof(all[0]));
 			// Don't free the old array if it's referenced by sweep.
 			// See the comment in mgc0.c.
-			if(h->allspans != runtime·mheap.sweepspans)
+			if(h->allspans != runtime·mheap.gcspans)
 				runtime·SysFree(h->allspans, h->nspancap*sizeof(all[0]), &mstats.other_sys);
 		}
 		h->allspans = all;