Commit cb133c66 authored by Dmitriy Vyukov's avatar Dmitriy Vyukov

runtime: do not collect GC roots explicitly

Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043
parent 0e027fca
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
enum { enum {
Debug = 0, Debug = 0,
DebugMark = 0, // run second pass to check mark
CollectStats = 0, CollectStats = 0,
ScanStackByFrames = 1, ScanStackByFrames = 1,
IgnorePreciseGC = 0, IgnorePreciseGC = 0,
...@@ -40,6 +39,13 @@ enum { ...@@ -40,6 +39,13 @@ enum {
BitsPointer = 1, BitsPointer = 1,
BitsIface = 2, BitsIface = 2,
BitsEface = 3, BitsEface = 3,
RootData = 0,
RootBss = 1,
RootFinalizers = 2,
RootSpanTypes = 3,
RootFlushCaches = 4,
RootCount = 5,
}; };
static struct static struct
...@@ -190,7 +196,10 @@ static Workbuf* getfull(Workbuf*); ...@@ -190,7 +196,10 @@ static Workbuf* getfull(Workbuf*);
static void putempty(Workbuf*); static void putempty(Workbuf*);
static Workbuf* handoff(Workbuf*); static Workbuf* handoff(Workbuf*);
static void gchelperstart(void); static void gchelperstart(void);
static void scanstack(G* gp, void *scanbuf); static void addfinroots(void *wbufp, void *v);
static void flushallmcaches(void);
static void scanframe(Stkframe *frame, void *wbufp);
static void addstackroots(G *gp, Workbuf **wbufp);
static struct { static struct {
uint64 full; // lock-free list of full blocks uint64 full; // lock-free list of full blocks
...@@ -200,7 +209,6 @@ static struct { ...@@ -200,7 +209,6 @@ static struct {
int64 tstart; int64 tstart;
volatile uint32 nwait; volatile uint32 nwait;
volatile uint32 ndone; volatile uint32 ndone;
volatile uint32 debugmarkdone;
Note alldone; Note alldone;
ParFor *markfor; ParFor *markfor;
ParFor *sweepfor; ParFor *sweepfor;
...@@ -208,16 +216,11 @@ static struct { ...@@ -208,16 +216,11 @@ static struct {
Lock; Lock;
byte *chunk; byte *chunk;
uintptr nchunk; uintptr nchunk;
Obj *roots;
uint32 nroot;
uint32 rootcap;
} work; } work;
enum { enum {
GC_DEFAULT_PTR = GC_NUM_INSTR, GC_DEFAULT_PTR = GC_NUM_INSTR,
GC_CHAN, GC_CHAN,
GC_G_PTR,
GC_NUM_INSTR2 GC_NUM_INSTR2
}; };
...@@ -636,9 +639,6 @@ static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR}; ...@@ -636,9 +639,6 @@ static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR};
// Hchan program // Hchan program
static uintptr chanProg[2] = {0, GC_CHAN}; static uintptr chanProg[2] = {0, GC_CHAN};
// G* program
static uintptr gptrProg[2] = {0, GC_G_PTR};
// Local variables of a program fragment or loop // Local variables of a program fragment or loop
typedef struct Frame Frame; typedef struct Frame Frame;
struct Frame { struct Frame {
...@@ -707,15 +707,11 @@ checkptr(void *obj, uintptr objti) ...@@ -707,15 +707,11 @@ checkptr(void *obj, uintptr objti)
// a work list in the Workbuf* structures and loops in the main function // a work list in the Workbuf* structures and loops in the main function
// body. Keeping an explicit work list is easier on the stack allocator and // body. Keeping an explicit work list is easier on the stack allocator and
// more efficient. // more efficient.
//
// wbuf: current work buffer
// wp: storage for next queued pointer (write pointer)
// nobj: number of queued objects
static void static void
scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) scanblock(Workbuf *wbuf, bool keepworking)
{ {
byte *b, *arena_start, *arena_used; byte *b, *arena_start, *arena_used;
uintptr n, i, end_b, elemsize, size, ti, objti, count, type; uintptr n, i, end_b, elemsize, size, ti, objti, count, type, nobj;
uintptr *pc, precise_type, nominal_size; uintptr *pc, precise_type, nominal_size;
uintptr *chan_ret, chancap; uintptr *chan_ret, chancap;
void *obj; void *obj;
...@@ -728,6 +724,7 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) ...@@ -728,6 +724,7 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
Iface *iface; Iface *iface;
Hchan *chan; Hchan *chan;
ChanType *chantype; ChanType *chantype;
Obj *wp;
if(sizeof(Workbuf) % PageSize != 0) if(sizeof(Workbuf) % PageSize != 0)
runtime·throw("scanblock: size of Workbuf is suboptimal"); runtime·throw("scanblock: size of Workbuf is suboptimal");
...@@ -741,6 +738,14 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) ...@@ -741,6 +738,14 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
precise_type = false; precise_type = false;
nominal_size = 0; nominal_size = 0;
if(wbuf) {
nobj = wbuf->nobj;
wp = &wbuf->obj[nobj];
} else {
nobj = 0;
wp = nil;
}
// Initialize sbuf // Initialize sbuf
scanbuffers = &bufferList[m->helpgc]; scanbuffers = &bufferList[m->helpgc];
...@@ -1075,11 +1080,6 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) ...@@ -1075,11 +1080,6 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
pc = chan_ret; pc = chan_ret;
continue; continue;
case GC_G_PTR:
obj = (void*)stack_top.b;
scanstack(obj, &sbuf);
goto next_block;
default: default:
runtime·throw("scanblock: invalid GC instruction"); runtime·throw("scanblock: invalid GC instruction");
return; return;
...@@ -1124,82 +1124,6 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) ...@@ -1124,82 +1124,6 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
} }
} }
// debug_scanblock is the debug copy of scanblock.
// it is simpler, slower, single-threaded, recursive,
// and uses bitSpecial as the mark bit.
static void
debug_scanblock(byte *b, uintptr n)
{
byte *obj, *p;
void **vp;
uintptr size, *bitp, bits, shift, i, xbits, off;
MSpan *s;
if(!DebugMark)
runtime·throw("debug_scanblock without DebugMark");
if((intptr)n < 0) {
runtime·printf("debug_scanblock %p %D\n", b, (int64)n);
runtime·throw("debug_scanblock");
}
// Align b to a word boundary.
off = (uintptr)b & (PtrSize-1);
if(off != 0) {
b += PtrSize - off;
n -= PtrSize - off;
}
vp = (void**)b;
n /= PtrSize;
for(i=0; i<n; i++) {
obj = (byte*)vp[i];
// Words outside the arena cannot be pointers.
if((byte*)obj < runtime·mheap.arena_start || (byte*)obj >= runtime·mheap.arena_used)
continue;
// Round down to word boundary.
obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
// Consult span table to find beginning.
s = runtime·MHeap_LookupMaybe(&runtime·mheap, obj);
if(s == nil)
continue;
p = (byte*)((uintptr)s->start<<PageShift);
size = s->elemsize;
if(s->sizeclass == 0) {
obj = p;
} else {
int32 i = ((byte*)obj - p)/size;
obj = p+i*size;
}
// Now that we know the object header, reload bits.
off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start;
bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
xbits = *bitp;
bits = xbits >> shift;
// Now we have bits, bitp, and shift correct for
// obj pointing at the base of the object.
// If not allocated or already marked, done.
if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked
continue;
*bitp |= bitSpecial<<shift;
if(!(bits & bitMarked))
runtime·printf("found unmarked block %p in %p\n", obj, vp+i);
// If object has no pointers, don't need to scan further.
if((bits & bitScan) == 0)
continue;
debug_scanblock(obj, size);
}
}
// Append obj to the work buffer. // Append obj to the work buffer.
// _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer. // _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer.
static void static void
...@@ -1255,19 +1179,92 @@ enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj) ...@@ -1255,19 +1179,92 @@ enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj)
*_nobj = nobj; *_nobj = nobj;
} }
static void
enqueue1(Workbuf **wbufp, Obj obj)
{
Workbuf *wbuf;
wbuf = *wbufp;
if(wbuf->nobj >= nelem(wbuf->obj))
*wbufp = wbuf = getempty(wbuf);
wbuf->obj[wbuf->nobj++] = obj;
}
static void static void
markroot(ParFor *desc, uint32 i) markroot(ParFor *desc, uint32 i)
{ {
Obj *wp;
Workbuf *wbuf; Workbuf *wbuf;
uintptr nobj; FinBlock *fb;
MSpan **allspans, *s;
uint32 spanidx;
G *gp;
USED(&desc); USED(&desc);
wp = nil; wbuf = getempty(nil);
wbuf = nil; switch(i) {
nobj = 0; case RootData:
enqueue(work.roots[i], &wbuf, &wp, &nobj); enqueue1(&wbuf, (Obj){data, edata - data, (uintptr)gcdata});
scanblock(wbuf, wp, nobj, false); break;
case RootBss:
enqueue1(&wbuf, (Obj){bss, ebss - bss, (uintptr)gcbss});
break;
case RootFinalizers:
for(fb=allfin; fb; fb=fb->alllink)
enqueue1(&wbuf, (Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0});
break;
case RootSpanTypes:
// mark span types and MSpan.specials (to walk spans only once)
allspans = runtime·mheap.allspans;
for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
Special *sp;
SpecialFinalizer *spf;
s = allspans[spanidx];
if(s->state != MSpanInUse)
continue;
// The garbage collector ignores type pointers stored in MSpan.types:
// - Compiler-generated types are stored outside of heap.
// - The reflect package has runtime-generated types cached in its data structures.
// The garbage collector relies on finding the references via that cache.
if(s->types.compression == MTypes_Words || s->types.compression == MTypes_Bytes)
markonly((byte*)s->types.data);
for(sp = s->specials; sp != nil; sp = sp->next) {
if(sp->kind != KindSpecialFinalizer)
continue;
// don't mark finalized object, but scan it so we
// retain everything it points to.
spf = (SpecialFinalizer*)sp;
enqueue1(&wbuf, (Obj){(void*)((s->start << PageShift) + spf->offset), s->elemsize, 0});
enqueue1(&wbuf, (Obj){(void*)&spf->fn, PtrSize, 0});
enqueue1(&wbuf, (Obj){(void*)&spf->fint, PtrSize, 0});
enqueue1(&wbuf, (Obj){(void*)&spf->ot, PtrSize, 0});
}
}
break;
case RootFlushCaches:
flushallmcaches();
break;
default:
// the rest is scanning goroutine stacks
if(i - RootCount >= runtime·allglen)
runtime·throw("markroot: bad index");
gp = runtime·allg[i - RootCount];
// remember when we've first observed the G blocked
// needed only to output in traceback
if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince == 0)
gp->waitsince = work.tstart;
addstackroots(gp, &wbuf);
break;
}
if(wbuf)
scanblock(wbuf, false);
} }
// Get an empty work buffer off the work.empty list, // Get an empty work buffer off the work.empty list,
...@@ -1364,30 +1361,6 @@ handoff(Workbuf *b) ...@@ -1364,30 +1361,6 @@ handoff(Workbuf *b)
return b1; return b1;
} }
static void
addroot(Obj obj)
{
uint32 cap;
Obj *new;
if(work.nroot >= work.rootcap) {
cap = PageSize/sizeof(Obj);
if(cap < 2*work.rootcap)
cap = 2*work.rootcap;
new = (Obj*)runtime·SysAlloc(cap*sizeof(Obj), &mstats.gc_sys);
if(new == nil)
runtime·throw("runtime: cannot allocate memory");
if(work.roots != nil) {
runtime·memmove(new, work.roots, work.rootcap*sizeof(Obj));
runtime·SysFree(work.roots, work.rootcap*sizeof(Obj), &mstats.gc_sys);
}
work.roots = new;
work.rootcap = cap;
}
work.roots[work.nroot] = obj;
work.nroot++;
}
extern byte pclntab[]; // base for f->ptrsoff extern byte pclntab[]; // base for f->ptrsoff
typedef struct BitVector BitVector; typedef struct BitVector BitVector;
...@@ -1427,7 +1400,7 @@ stackmapdata(StackMap *stackmap, int32 n) ...@@ -1427,7 +1400,7 @@ stackmapdata(StackMap *stackmap, int32 n)
// Scans an interface data value when the interface type indicates // Scans an interface data value when the interface type indicates
// that it is a pointer. // that it is a pointer.
static void static void
scaninterfacedata(uintptr bits, byte *scanp, bool afterprologue, Scanbuf *sbuf) scaninterfacedata(uintptr bits, byte *scanp, bool afterprologue, void *wbufp)
{ {
Itab *tab; Itab *tab;
Type *type; Type *type;
...@@ -1443,14 +1416,12 @@ scaninterfacedata(uintptr bits, byte *scanp, bool afterprologue, Scanbuf *sbuf) ...@@ -1443,14 +1416,12 @@ scaninterfacedata(uintptr bits, byte *scanp, bool afterprologue, Scanbuf *sbuf)
return; return;
} }
} }
*sbuf->obj.pos++ = (Obj){scanp+PtrSize, PtrSize, 0}; enqueue1(wbufp, (Obj){scanp+PtrSize, PtrSize, 0});
if(sbuf->obj.pos == sbuf->obj.end)
flushobjbuf(sbuf);
} }
// Starting from scanp, scans words corresponding to set bits. // Starting from scanp, scans words corresponding to set bits.
static void static void
scanbitvector(byte *scanp, BitVector *bv, bool afterprologue, Scanbuf *sbuf) scanbitvector(byte *scanp, BitVector *bv, bool afterprologue, void *wbufp)
{ {
uintptr word, bits; uintptr word, bits;
uint32 *wordp; uint32 *wordp;
...@@ -1467,12 +1438,10 @@ scanbitvector(byte *scanp, BitVector *bv, bool afterprologue, Scanbuf *sbuf) ...@@ -1467,12 +1438,10 @@ scanbitvector(byte *scanp, BitVector *bv, bool afterprologue, Scanbuf *sbuf)
for(; i > 0; i--) { for(; i > 0; i--) {
bits = word & 3; bits = word & 3;
if(bits != BitsNoPointer && *(void**)scanp != nil) if(bits != BitsNoPointer && *(void**)scanp != nil)
if(bits == BitsPointer) { if(bits == BitsPointer)
*sbuf->obj.pos++ = (Obj){scanp, PtrSize, 0}; enqueue1(wbufp, (Obj){scanp, PtrSize, 0});
if(sbuf->obj.pos == sbuf->obj.end) else
flushobjbuf(sbuf); scaninterfacedata(bits, scanp, afterprologue, wbufp);
} else
scaninterfacedata(bits, scanp, afterprologue, sbuf);
word >>= BitsPerPointer; word >>= BitsPerPointer;
scanp += PtrSize; scanp += PtrSize;
} }
...@@ -1481,10 +1450,9 @@ scanbitvector(byte *scanp, BitVector *bv, bool afterprologue, Scanbuf *sbuf) ...@@ -1481,10 +1450,9 @@ scanbitvector(byte *scanp, BitVector *bv, bool afterprologue, Scanbuf *sbuf)
// Scan a stack frame: local variables and function arguments/results. // Scan a stack frame: local variables and function arguments/results.
static void static void
scanframe(Stkframe *frame, void *arg) scanframe(Stkframe *frame, void *wbufp)
{ {
Func *f; Func *f;
Scanbuf *sbuf;
StackMap *stackmap; StackMap *stackmap;
BitVector *bv; BitVector *bv;
uintptr size; uintptr size;
...@@ -1504,7 +1472,6 @@ scanframe(Stkframe *frame, void *arg) ...@@ -1504,7 +1472,6 @@ scanframe(Stkframe *frame, void *arg)
pcdata = 0; pcdata = 0;
} }
sbuf = arg;
// Scan local variables if stack frame has been allocated. // Scan local variables if stack frame has been allocated.
// Use pointer information if known. // Use pointer information if known.
afterprologue = (frame->varp > (byte*)frame->sp); afterprologue = (frame->varp > (byte*)frame->sp);
...@@ -1513,25 +1480,23 @@ scanframe(Stkframe *frame, void *arg) ...@@ -1513,25 +1480,23 @@ scanframe(Stkframe *frame, void *arg)
if(stackmap == nil) { if(stackmap == nil) {
// No locals information, scan everything. // No locals information, scan everything.
size = frame->varp - (byte*)frame->sp; size = frame->varp - (byte*)frame->sp;
*sbuf->obj.pos++ = (Obj){frame->varp - size, size, 0}; enqueue1(wbufp, (Obj){frame->varp - size, size, 0});
if(sbuf->obj.pos == sbuf->obj.end)
flushobjbuf(sbuf);
} else if(stackmap->n < 0) { } else if(stackmap->n < 0) {
// Locals size information, scan just the locals. // Locals size information, scan just the locals.
size = -stackmap->n; size = -stackmap->n;
*sbuf->obj.pos++ = (Obj){frame->varp - size, size, 0}; enqueue1(wbufp, (Obj){frame->varp - size, size, 0});
if(sbuf->obj.pos == sbuf->obj.end) } else if(stackmap->n > 0) {
flushobjbuf(sbuf); } else if(stackmap->n > 0) {
// Locals bitmap information, scan just the pointers in // Locals bitmap information, scan just the pointers in
// locals. // locals.
if(pcdata < 0 || pcdata >= stackmap->n) { if(pcdata < 0 || pcdata >= stackmap->n) {
// don't know where we are // don't know where we are
runtime·printf("pcdata is %d and %d stack map entries\n", pcdata, stackmap->n); runtime·printf("pcdata is %d and %d stack map entries for %s (targetpc=%p)\n",
runtime·throw("addframeroots: bad symbol table"); pcdata, stackmap->n, runtime·funcname(f), targetpc);
runtime·throw("scanframe: bad symbol table");
} }
bv = stackmapdata(stackmap, pcdata); bv = stackmapdata(stackmap, pcdata);
size = (bv->n * PtrSize) / BitsPerPointer; size = (bv->n * PtrSize) / BitsPerPointer;
scanbitvector(frame->varp - size, bv, afterprologue, sbuf); scanbitvector(frame->varp - size, bv, afterprologue, wbufp);
} }
} }
...@@ -1540,22 +1505,13 @@ scanframe(Stkframe *frame, void *arg) ...@@ -1540,22 +1505,13 @@ scanframe(Stkframe *frame, void *arg)
stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps); stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
if(stackmap != nil) { if(stackmap != nil) {
bv = stackmapdata(stackmap, pcdata); bv = stackmapdata(stackmap, pcdata);
scanbitvector(frame->argp, bv, false, sbuf); scanbitvector(frame->argp, bv, false, wbufp);
} else { } else
*sbuf->obj.pos++ = (Obj){frame->argp, frame->arglen, 0}; enqueue1(wbufp, (Obj){frame->argp, frame->arglen, 0});
if(sbuf->obj.pos == sbuf->obj.end)
flushobjbuf(sbuf);
}
}
static void
scanstack(G* gp, void *scanbuf)
{
runtime·gentraceback(~(uintptr)0, ~(uintptr)0, 0, gp, 0, nil, 0x7fffffff, scanframe, scanbuf, false);
} }
static void static void
addstackroots(G *gp) addstackroots(G *gp, Workbuf **wbufp)
{ {
M *mp; M *mp;
int32 n; int32 n;
...@@ -1564,6 +1520,20 @@ addstackroots(G *gp) ...@@ -1564,6 +1520,20 @@ addstackroots(G *gp)
void *base; void *base;
uintptr size; uintptr size;
switch(gp->status){
default:
runtime·printf("unexpected G.status %d (goroutine %p %D)\n", gp->status, gp, gp->goid);
runtime·throw("mark - bad status");
case Gdead:
return;
case Grunning:
runtime·throw("mark - world not stopped");
case Grunnable:
case Gsyscall:
case Gwaiting:
break;
}
if(gp == g) if(gp == g)
runtime·throw("can't scan our own stack"); runtime·throw("can't scan our own stack");
if((mp = gp->m) != nil && mp->helpgc) if((mp = gp->m) != nil && mp->helpgc)
...@@ -1585,13 +1555,13 @@ addstackroots(G *gp) ...@@ -1585,13 +1555,13 @@ addstackroots(G *gp)
guard = gp->stackguard; guard = gp->stackguard;
// For function about to start, context argument is a root too. // For function about to start, context argument is a root too.
if(gp->sched.ctxt != 0 && runtime·mlookup(gp->sched.ctxt, &base, &size, nil)) if(gp->sched.ctxt != 0 && runtime·mlookup(gp->sched.ctxt, &base, &size, nil))
addroot((Obj){base, size, 0}); enqueue1(wbufp, (Obj){base, size, 0});
} }
if(ScanStackByFrames) { if(ScanStackByFrames) {
USED(sp); USED(sp);
USED(stk); USED(stk);
USED(guard); USED(guard);
addroot((Obj){(byte*)gp, PtrSize, (uintptr)gptrProg}); runtime·gentraceback(~(uintptr)0, ~(uintptr)0, 0, gp, 0, nil, 0x7fffffff, scanframe, wbufp, false);
} else { } else {
n = 0; n = 0;
while(stk) { while(stk) {
...@@ -1599,7 +1569,7 @@ addstackroots(G *gp) ...@@ -1599,7 +1569,7 @@ addstackroots(G *gp)
runtime·printf("scanstack inconsistent: g%D#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk); runtime·printf("scanstack inconsistent: g%D#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk);
runtime·throw("scanstack"); runtime·throw("scanstack");
} }
addroot((Obj){(byte*)sp, (uintptr)stk - sp, (uintptr)defaultProg | PRECISE | LOOP}); enqueue1(wbufp, (Obj){(byte*)sp, (uintptr)stk - sp, (uintptr)defaultProg | PRECISE | LOOP});
sp = stk->gobuf.sp; sp = stk->gobuf.sp;
guard = stk->stackguard; guard = stk->stackguard;
stk = (Stktop*)stk->stackbase; stk = (Stktop*)stk->stackbase;
...@@ -1608,116 +1578,6 @@ addstackroots(G *gp) ...@@ -1608,116 +1578,6 @@ addstackroots(G *gp)
} }
} }
static void
addroots(void)
{
G *gp;
FinBlock *fb;
MSpan *s, **allspans;
uint32 spanidx;
Special *sp;
SpecialFinalizer *spf;
work.nroot = 0;
// data & bss
// TODO(atom): load balancing
addroot((Obj){data, edata - data, (uintptr)gcdata});
addroot((Obj){bss, ebss - bss, (uintptr)gcbss});
// MSpan.types
allspans = runtime·mheap.allspans;
for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
s = allspans[spanidx];
if(s->state == MSpanInUse) {
// The garbage collector ignores type pointers stored in MSpan.types:
// - Compiler-generated types are stored outside of heap.
// - The reflect package has runtime-generated types cached in its data structures.
// The garbage collector relies on finding the references via that cache.
switch(s->types.compression) {
case MTypes_Empty:
case MTypes_Single:
break;
case MTypes_Words:
case MTypes_Bytes:
markonly((byte*)s->types.data);
break;
}
}
}
// MSpan.specials
allspans = runtime·mheap.allspans;
for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
s = allspans[spanidx];
if(s->state != MSpanInUse)
continue;
for(sp = s->specials; sp != nil; sp = sp->next) {
switch(sp->kind) {
case KindSpecialFinalizer:
spf = (SpecialFinalizer*)sp;
// don't mark finalized object, but scan it so we
// retain everything it points to.
addroot((Obj){(void*)((s->start << PageShift) + spf->offset), s->elemsize, 0});
addroot((Obj){(void*)&spf->fn, PtrSize, 0});
addroot((Obj){(void*)&spf->fint, PtrSize, 0});
addroot((Obj){(void*)&spf->ot, PtrSize, 0});
break;
case KindSpecialProfile:
break;
}
}
}
// stacks
for(gp=runtime·allg; gp!=nil; gp=gp->alllink) {
switch(gp->status){
default:
runtime·printf("unexpected G.status %d\n", gp->status);
runtime·throw("mark - bad status");
case Gdead:
break;
case Grunning:
runtime·throw("mark - world not stopped");
case Grunnable:
case Gsyscall:
case Gwaiting:
addstackroots(gp);
break;
}
// remember when we've first observed the G blocked
// needed only to output in traceback
if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince == 0)
gp->waitsince = work.tstart;
}
for(fb=allfin; fb; fb=fb->alllink)
addroot((Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0});
}
static void
addfreelists(void)
{
int32 i;
P *p, **pp;
MCache *c;
MLink *m;
// Mark objects in the MCache of each P so we don't collect them.
for(pp=runtime·allp; p=*pp; pp++) {
c = p->mcache;
if(c==nil)
continue;
for(i = 0; i < NumSizeClasses; i++) {
for(m = c->list[i].list; m != nil; m = m->next) {
markonly(m);
}
}
}
// Note: the sweeper will mark objects in each span's freelist.
}
void void
runtime·queuefinalizer(byte *p, FuncVal *fn, uintptr nret, Type *fint, PtrType *ot) runtime·queuefinalizer(byte *p, FuncVal *fn, uintptr nret, Type *fint, PtrType *ot)
{ {
...@@ -1844,11 +1704,6 @@ sweepspan(ParFor *desc, uint32 idx) ...@@ -1844,11 +1704,6 @@ sweepspan(ParFor *desc, uint32 idx)
continue; continue;
if((bits & bitMarked) != 0) { if((bits & bitMarked) != 0) {
if(DebugMark) {
if(!(bits & bitSpecial))
runtime·printf("found spurious mark on %p\n", p);
*bitp &= ~(bitSpecial<<shift);
}
*bitp &= ~(bitMarked<<shift); *bitp &= ~(bitMarked<<shift);
continue; continue;
} }
...@@ -1981,13 +1836,7 @@ runtime·gchelper(void) ...@@ -1981,13 +1836,7 @@ runtime·gchelper(void)
runtime·parfordo(work.markfor); runtime·parfordo(work.markfor);
// help other threads scan secondary blocks // help other threads scan secondary blocks
scanblock(nil, nil, 0, true); scanblock(nil, true);
if(DebugMark) {
// wait while the main thread executes mark(debug_scanblock)
while(runtime·atomicload(&work.debugmarkdone) == 0)
runtime·usleep(10);
}
runtime·parfordo(work.sweepfor); runtime·parfordo(work.sweepfor);
bufferList[m->helpgc].busy = 0; bufferList[m->helpgc].busy = 0;
...@@ -2023,13 +1872,26 @@ cachestats(void) ...@@ -2023,13 +1872,26 @@ cachestats(void)
} }
} }
static void
flushallmcaches(void)
{
P *p, **pp;
MCache *c;
// Flush MCache's to MCentral.
for(pp=runtime·allp; p=*pp; pp++) {
c = p->mcache;
if(c==nil)
continue;
runtime·MCache_ReleaseAll(c);
}
}
static void static void
updatememstats(GCStats *stats) updatememstats(GCStats *stats)
{ {
M *mp; M *mp;
MSpan *s; MSpan *s;
MCache *c;
P *p, **pp;
int32 i; int32 i;
uint64 stacks_inuse, smallfree; uint64 stacks_inuse, smallfree;
uint64 *src, *dst; uint64 *src, *dst;
...@@ -2070,12 +1932,7 @@ updatememstats(GCStats *stats) ...@@ -2070,12 +1932,7 @@ updatememstats(GCStats *stats)
} }
// Flush MCache's to MCentral. // Flush MCache's to MCentral.
for(pp=runtime·allp; p=*pp; pp++) { flushallmcaches();
c = p->mcache;
if(c==nil)
continue;
runtime·MCache_ReleaseAll(c);
}
// Aggregate local stats. // Aggregate local stats.
cachestats(); cachestats();
...@@ -2278,11 +2135,8 @@ gc(struct gc_args *args) ...@@ -2278,11 +2135,8 @@ gc(struct gc_args *args)
work.nwait = 0; work.nwait = 0;
work.ndone = 0; work.ndone = 0;
work.debugmarkdone = 0;
work.nproc = runtime·gcprocs(); work.nproc = runtime·gcprocs();
addroots(); runtime·parforsetup(work.markfor, work.nproc, RootCount + runtime·allglen, nil, false, markroot);
addfreelists();
runtime·parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot);
runtime·parforsetup(work.sweepfor, work.nproc, runtime·mheap.nspan, nil, true, sweepspan); runtime·parforsetup(work.sweepfor, work.nproc, runtime·mheap.nspan, nil, true, sweepspan);
if(work.nproc > 1) { if(work.nproc > 1) {
runtime·noteclear(&work.alldone); runtime·noteclear(&work.alldone);
...@@ -2293,13 +2147,8 @@ gc(struct gc_args *args) ...@@ -2293,13 +2147,8 @@ gc(struct gc_args *args)
gchelperstart(); gchelperstart();
runtime·parfordo(work.markfor); runtime·parfordo(work.markfor);
scanblock(nil, nil, 0, true); scanblock(nil, true);
if(DebugMark) {
for(i=0; i<work.nroot; i++)
debug_scanblock(work.roots[i].p, work.roots[i].n);
runtime·atomicstore(&work.debugmarkdone, 1);
}
t2 = runtime·nanotime(); t2 = runtime·nanotime();
runtime·parfordo(work.sweepfor); runtime·parfordo(work.sweepfor);
......
...@@ -436,7 +436,7 @@ saveg(uintptr pc, uintptr sp, G *gp, TRecord *r) ...@@ -436,7 +436,7 @@ saveg(uintptr pc, uintptr sp, G *gp, TRecord *r)
} }
func GoroutineProfile(b Slice) (n int, ok bool) { func GoroutineProfile(b Slice) (n int, ok bool) {
uintptr pc, sp; uintptr pc, sp, i;
TRecord *r; TRecord *r;
G *gp; G *gp;
...@@ -455,7 +455,8 @@ func GoroutineProfile(b Slice) (n int, ok bool) { ...@@ -455,7 +455,8 @@ func GoroutineProfile(b Slice) (n int, ok bool) {
ok = true; ok = true;
r = (TRecord*)b.array; r = (TRecord*)b.array;
saveg(pc, sp, g, r++); saveg(pc, sp, g, r++);
for(gp = runtime·allg; gp != nil; gp = gp->alllink) { for(i = 0; i < runtime·allglen; i++) {
gp = runtime·allg[i];
if(gp == g || gp->status == Gdead) if(gp == g || gp->status == Gdead)
continue; continue;
saveg(~(uintptr)0, ~(uintptr)0, gp, r++); saveg(~(uintptr)0, ~(uintptr)0, gp, r++);
......
...@@ -68,7 +68,6 @@ uint32 runtime·needextram; ...@@ -68,7 +68,6 @@ uint32 runtime·needextram;
bool runtime·iscgo; bool runtime·iscgo;
M runtime·m0; M runtime·m0;
G runtime·g0; // idle goroutine for m0 G runtime·g0; // idle goroutine for m0
G* runtime·allg;
G* runtime·lastg; G* runtime·lastg;
M* runtime·allm; M* runtime·allm;
M* runtime·extram; M* runtime·extram;
...@@ -76,6 +75,11 @@ int8* runtime·goos; ...@@ -76,6 +75,11 @@ int8* runtime·goos;
int32 runtime·ncpu; int32 runtime·ncpu;
static int32 newprocs; static int32 newprocs;
static Lock allglock; // the following vars are protected by this lock or by stoptheworld
G** runtime·allg;
uintptr runtime·allglen;
static uintptr allgcap;
void runtime·mstart(void); void runtime·mstart(void);
static void runqput(P*, G*); static void runqput(P*, G*);
static G* runqget(P*); static G* runqget(P*);
...@@ -115,6 +119,7 @@ static bool preemptall(void); ...@@ -115,6 +119,7 @@ static bool preemptall(void);
static bool preemptone(P*); static bool preemptone(P*);
static bool exitsyscallfast(void); static bool exitsyscallfast(void);
static bool haveexperiment(int8*); static bool haveexperiment(int8*);
static void allgadd(G*);
// The bootstrap sequence is: // The bootstrap sequence is:
// //
...@@ -278,6 +283,7 @@ runtime·tracebackothers(G *me) ...@@ -278,6 +283,7 @@ runtime·tracebackothers(G *me)
{ {
G *gp; G *gp;
int32 traceback; int32 traceback;
uintptr i;
traceback = runtime·gotraceback(nil); traceback = runtime·gotraceback(nil);
...@@ -288,7 +294,9 @@ runtime·tracebackothers(G *me) ...@@ -288,7 +294,9 @@ runtime·tracebackothers(G *me)
runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp); runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
} }
for(gp = runtime·allg; gp != nil; gp = gp->alllink) { runtime·lock(&allglock);
for(i = 0; i < runtime·allglen; i++) {
gp = runtime·allg[i];
if(gp == me || gp == m->curg || gp->status == Gdead) if(gp == me || gp == m->curg || gp->status == Gdead)
continue; continue;
if(gp->issystem && traceback < 2) if(gp->issystem && traceback < 2)
...@@ -301,6 +309,7 @@ runtime·tracebackothers(G *me) ...@@ -301,6 +309,7 @@ runtime·tracebackothers(G *me)
} else } else
runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp); runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
} }
runtime·unlock(&allglock);
} }
static void static void
...@@ -792,13 +801,7 @@ runtime·newextram(void) ...@@ -792,13 +801,7 @@ runtime·newextram(void)
if(raceenabled) if(raceenabled)
gp->racectx = runtime·racegostart(runtime·newextram); gp->racectx = runtime·racegostart(runtime·newextram);
// put on allg for garbage collector // put on allg for garbage collector
runtime·lock(&runtime·sched); allgadd(gp);
if(runtime·lastg == nil)
runtime·allg = gp;
else
runtime·lastg->alllink = gp;
runtime·lastg = gp;
runtime·unlock(&runtime·sched);
// Add m to the extra list. // Add m to the extra list.
mnext = lockextra(true); mnext = lockextra(true);
...@@ -1766,13 +1769,7 @@ runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerp ...@@ -1766,13 +1769,7 @@ runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerp
runtime·throw("invalid stack in newg"); runtime·throw("invalid stack in newg");
} else { } else {
newg = runtime·malg(StackMin); newg = runtime·malg(StackMin);
runtime·lock(&runtime·sched); allgadd(newg);
if(runtime·lastg == nil)
runtime·allg = newg;
else
runtime·lastg->alllink = newg;
runtime·lastg = newg;
runtime·unlock(&runtime·sched);
} }
sp = (byte*)newg->stackbase; sp = (byte*)newg->stackbase;
...@@ -1805,6 +1802,31 @@ runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerp ...@@ -1805,6 +1802,31 @@ runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerp
return newg; return newg;
} }
static void
allgadd(G *gp)
{
G **new;
uintptr cap;
runtime·lock(&allglock);
if(runtime·allglen >= allgcap) {
cap = 4096/sizeof(new[0]);
if(cap < 2*allgcap)
cap = 2*allgcap;
new = runtime·malloc(cap*sizeof(new[0]));
if(new == nil)
runtime·throw("runtime: cannot allocate memory");
if(runtime·allg != nil) {
runtime·memmove(new, runtime·allg, runtime·allglen*sizeof(new[0]));
runtime·free(runtime·allg);
}
runtime·allg = new;
allgcap = cap;
}
runtime·allg[runtime·allglen++] = gp;
runtime·unlock(&allglock);
}
// Put on gfree list. // Put on gfree list.
// If local list is too long, transfer a batch to the global list. // If local list is too long, transfer a batch to the global list.
static void static void
...@@ -1994,19 +2016,21 @@ runtime·gcount(void) ...@@ -1994,19 +2016,21 @@ runtime·gcount(void)
{ {
G *gp; G *gp;
int32 n, s; int32 n, s;
uintptr i;
n = 0; n = 0;
runtime·lock(&runtime·sched); runtime·lock(&allglock);
// TODO(dvyukov): runtime.NumGoroutine() is O(N). // TODO(dvyukov): runtime.NumGoroutine() is O(N).
// We do not want to increment/decrement centralized counter in newproc/goexit, // We do not want to increment/decrement centralized counter in newproc/goexit,
// just to make runtime.NumGoroutine() faster. // just to make runtime.NumGoroutine() faster.
// Compromise solution is to introduce per-P counters of active goroutines. // Compromise solution is to introduce per-P counters of active goroutines.
for(gp = runtime·allg; gp; gp = gp->alllink) { for(i = 0; i < runtime·allglen; i++) {
gp = runtime·allg[i];
s = gp->status; s = gp->status;
if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting) if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
n++; n++;
} }
runtime·unlock(&runtime·sched); runtime·unlock(&allglock);
return n; return n;
} }
...@@ -2345,6 +2369,7 @@ checkdead(void) ...@@ -2345,6 +2369,7 @@ checkdead(void)
{ {
G *gp; G *gp;
int32 run, grunning, s; int32 run, grunning, s;
uintptr i;
// -1 for sysmon // -1 for sysmon
run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.nmidlelocked - 1; run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.nmidlelocked - 1;
...@@ -2356,17 +2381,21 @@ checkdead(void) ...@@ -2356,17 +2381,21 @@ checkdead(void)
runtime·throw("checkdead: inconsistent counts"); runtime·throw("checkdead: inconsistent counts");
} }
grunning = 0; grunning = 0;
for(gp = runtime·allg; gp; gp = gp->alllink) { runtime·lock(&allglock);
for(i = 0; i < runtime·allglen; i++) {
gp = runtime·allg[i];
if(gp->isbackground) if(gp->isbackground)
continue; continue;
s = gp->status; s = gp->status;
if(s == Gwaiting) if(s == Gwaiting)
grunning++; grunning++;
else if(s == Grunnable || s == Grunning || s == Gsyscall) { else if(s == Grunnable || s == Grunning || s == Gsyscall) {
runtime·unlock(&allglock);
runtime·printf("checkdead: find g %D in status %d\n", gp->goid, s); runtime·printf("checkdead: find g %D in status %d\n", gp->goid, s);
runtime·throw("checkdead: runnable g"); runtime·throw("checkdead: runnable g");
} }
} }
runtime·unlock(&allglock);
if(grunning == 0) // possible if main goroutine calls runtime·Goexit() if(grunning == 0) // possible if main goroutine calls runtime·Goexit()
runtime·exit(0); runtime·exit(0);
m->throwing = -1; // do not dump full stacks m->throwing = -1; // do not dump full stacks
...@@ -2553,6 +2582,7 @@ runtime·schedtrace(bool detailed) ...@@ -2553,6 +2582,7 @@ runtime·schedtrace(bool detailed)
int64 now; int64 now;
int64 id1, id2, id3; int64 id1, id2, id3;
int32 i, t, h; int32 i, t, h;
uintptr gi;
int8 *fmt; int8 *fmt;
M *mp, *lockedm; M *mp, *lockedm;
G *gp, *lockedg; G *gp, *lockedg;
...@@ -2620,13 +2650,16 @@ runtime·schedtrace(bool detailed) ...@@ -2620,13 +2650,16 @@ runtime·schedtrace(bool detailed)
mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc, mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
mp->spinning, id3); mp->spinning, id3);
} }
for(gp = runtime·allg; gp; gp = gp->alllink) { runtime·lock(&allglock);
for(gi = 0; gi < runtime·allglen; gi++) {
gp = runtime·allg[gi];
mp = gp->m; mp = gp->m;
lockedm = gp->lockedm; lockedm = gp->lockedm;
runtime·printf(" G%D: status=%d(%s) m=%d lockedm=%d\n", runtime·printf(" G%D: status=%d(%s) m=%d lockedm=%d\n",
gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1, gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,
lockedm ? lockedm->id : -1); lockedm ? lockedm->id : -1);
} }
runtime·unlock(&allglock);
runtime·unlock(&runtime·sched); runtime·unlock(&runtime·sched);
} }
......
...@@ -263,7 +263,6 @@ struct G ...@@ -263,7 +263,6 @@ struct G
uintptr stackguard; // same as stackguard0, but not set to StackPreempt uintptr stackguard; // same as stackguard0, but not set to StackPreempt
uintptr stack0; uintptr stack0;
uintptr stacksize; uintptr stacksize;
G* alllink; // on allg
void* param; // passed parameter on wakeup void* param; // passed parameter on wakeup
int16 status; int16 status;
int64 goid; int64 goid;
...@@ -719,7 +718,8 @@ bool runtime·topofstack(Func*); ...@@ -719,7 +718,8 @@ bool runtime·topofstack(Func*);
*/ */
extern String runtime·emptystring; extern String runtime·emptystring;
extern uintptr runtime·zerobase; extern uintptr runtime·zerobase;
extern G* runtime·allg; extern G** runtime·allg;
extern uintptr runtime·allglen;
extern G* runtime·lastg; extern G* runtime·lastg;
extern M* runtime·allm; extern M* runtime·allm;
extern P** runtime·allp; extern P** runtime·allp;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment