Commit d839a809 authored by Dmitriy Vyukov's avatar Dmitriy Vyukov

runtime: make GC stats per-M

This is factored out part of:
https://golang.org/cl/5279048/
(Parallel GC)

benchmark                             old ns/op    new ns/op    delta
garbage.BenchmarkParser              3999106750   3975026500   -0.60%
garbage.BenchmarkParser-2            3720553750   3719196500   -0.04%
garbage.BenchmarkParser-4            3502857000   3474980500   -0.80%
garbage.BenchmarkParser-8            3375448000   3341310500   -1.01%
garbage.BenchmarkParserLastPause      329401000    324097000   -1.61%
garbage.BenchmarkParserLastPause-2    208953000    214222000   +2.52%
garbage.BenchmarkParserLastPause-4    110933000    111656000   +0.65%
garbage.BenchmarkParserLastPause-8     71969000     78230000   +8.70%
garbage.BenchmarkParserPause          230808842    197237400  -14.55%
garbage.BenchmarkParserPause-2        123674365    125197595   +1.23%
garbage.BenchmarkParserPause-4         80518525     85710333   +6.45%
garbage.BenchmarkParserPause-8         58310243     56940512   -2.35%
garbage.BenchmarkTree2                 31471700     31289400   -0.58%
garbage.BenchmarkTree2-2               21536800     21086300   -2.09%
garbage.BenchmarkTree2-4               11074700     10880000   -1.76%
garbage.BenchmarkTree2-8                7568600      7351400   -2.87%
garbage.BenchmarkTree2LastPause       314664000    312840000   -0.58%
garbage.BenchmarkTree2LastPause-2     215319000    210815000   -2.09%
garbage.BenchmarkTree2LastPause-4     110698000    108751000   -1.76%
garbage.BenchmarkTree2LastPause-8      75635000     73463000   -2.87%
garbage.BenchmarkTree2Pause           174280857    173147571   -0.65%
garbage.BenchmarkTree2Pause-2         131332714    129665761   -1.27%
garbage.BenchmarkTree2Pause-4          93803095     93422904   -0.41%
garbage.BenchmarkTree2Pause-8          86242333     85146761   -1.27%

R=rsc
CC=golang-dev
https://golang.org/cl/5987045
parent 77e1227a
...@@ -67,9 +67,6 @@ enum { ...@@ -67,9 +67,6 @@ enum {
// //
uint32 runtime·worldsema = 1; uint32 runtime·worldsema = 1;
// TODO: Make these per-M.
static uint64 nhandoff;
static int32 gctrace; static int32 gctrace;
typedef struct Workbuf Workbuf; typedef struct Workbuf Workbuf;
...@@ -529,12 +526,16 @@ getfull(Workbuf *b) ...@@ -529,12 +526,16 @@ getfull(Workbuf *b)
} }
if(work.nwait == work.nproc) if(work.nwait == work.nproc)
return nil; return nil;
if(i < 10) if(i < 10) {
m->gcstats.nprocyield++;
runtime·procyield(20); runtime·procyield(20);
else if(i < 20) } else if(i < 20) {
m->gcstats.nosyield++;
runtime·osyield(); runtime·osyield();
else } else {
m->gcstats.nsleep++;
runtime·usleep(100); runtime·usleep(100);
}
} }
} }
...@@ -550,7 +551,8 @@ handoff(Workbuf *b) ...@@ -550,7 +551,8 @@ handoff(Workbuf *b)
b->nobj -= n; b->nobj -= n;
b1->nobj = n; b1->nobj = n;
runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]); runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
nhandoff += n; m->gcstats.nhandoff++;
m->gcstats.nhandoffcnt += n;
// Put b on full list - let first half of b get stolen. // Put b on full list - let first half of b get stolen.
runtime·lock(&work.fmu); runtime·lock(&work.fmu);
...@@ -852,20 +854,30 @@ stealcache(void) ...@@ -852,20 +854,30 @@ stealcache(void)
} }
static void static void
cachestats(void) cachestats(GCStats *stats)
{ {
M *m; M *m;
MCache *c; MCache *c;
int32 i; int32 i;
uint64 stacks_inuse; uint64 stacks_inuse;
uint64 stacks_sys; uint64 stacks_sys;
uint64 *src, *dst;
if(stats)
runtime·memclr((byte*)stats, sizeof(*stats));
stacks_inuse = 0; stacks_inuse = 0;
stacks_sys = 0; stacks_sys = 0;
for(m=runtime·allm; m; m=m->alllink) { for(m=runtime·allm; m; m=m->alllink) {
runtime·purgecachedstats(m); runtime·purgecachedstats(m);
stacks_inuse += m->stackalloc->inuse; stacks_inuse += m->stackalloc->inuse;
stacks_sys += m->stackalloc->sys; stacks_sys += m->stackalloc->sys;
if(stats) {
src = (uint64*)&m->gcstats;
dst = (uint64*)stats;
for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
dst[i] += src[i];
runtime·memclr((byte*)&m->gcstats, sizeof(m->gcstats));
}
c = m->mcache; c = m->mcache;
for(i=0; i<nelem(c->local_by_size); i++) { for(i=0; i<nelem(c->local_by_size); i++) {
mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc; mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
...@@ -885,6 +897,7 @@ runtime·gc(int32 force) ...@@ -885,6 +897,7 @@ runtime·gc(int32 force)
uint64 heap0, heap1, obj0, obj1; uint64 heap0, heap1, obj0, obj1;
byte *p; byte *p;
bool extra; bool extra;
GCStats stats;
// The gc is turned off (via enablegc) until // The gc is turned off (via enablegc) until
// the bootstrap has completed. // the bootstrap has completed.
...@@ -920,12 +933,11 @@ runtime·gc(int32 force) ...@@ -920,12 +933,11 @@ runtime·gc(int32 force)
} }
t0 = runtime·nanotime(); t0 = runtime·nanotime();
nhandoff = 0;
m->gcing = 1; m->gcing = 1;
runtime·stoptheworld(); runtime·stoptheworld();
cachestats(); cachestats(nil);
heap0 = mstats.heap_alloc; heap0 = mstats.heap_alloc;
obj0 = mstats.nmalloc - mstats.nfree; obj0 = mstats.nmalloc - mstats.nfree;
...@@ -955,13 +967,13 @@ runtime·gc(int32 force) ...@@ -955,13 +967,13 @@ runtime·gc(int32 force)
t2 = runtime·nanotime(); t2 = runtime·nanotime();
stealcache(); stealcache();
cachestats(); cachestats(&stats);
mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
m->gcing = 0; m->gcing = 0;
m->locks++; // disable gc during the mallocs in newproc
if(finq != nil) { if(finq != nil) {
m->locks++; // disable gc during the mallocs in newproc
// kick off or wake up goroutine to run queued finalizers // kick off or wake up goroutine to run queued finalizers
if(fing == nil) if(fing == nil)
fing = runtime·newproc1((byte*)runfinq, nil, 0, 0, runtime·gc); fing = runtime·newproc1((byte*)runfinq, nil, 0, 0, runtime·gc);
...@@ -969,10 +981,9 @@ runtime·gc(int32 force) ...@@ -969,10 +981,9 @@ runtime·gc(int32 force)
fingwait = 0; fingwait = 0;
runtime·ready(fing); runtime·ready(fing);
} }
m->locks--;
} }
m->locks--;
cachestats();
heap1 = mstats.heap_alloc; heap1 = mstats.heap_alloc;
obj1 = mstats.nmalloc - mstats.nfree; obj1 = mstats.nmalloc - mstats.nfree;
...@@ -985,11 +996,13 @@ runtime·gc(int32 force) ...@@ -985,11 +996,13 @@ runtime·gc(int32 force)
runtime·printf("pause %D\n", t3-t0); runtime·printf("pause %D\n", t3-t0);
if(gctrace) { if(gctrace) {
runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-%D) objects %D handoff\n", runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
" %D(%D) handoff, %D/%D/%D yields\n",
mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000, mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000,
heap0>>20, heap1>>20, obj0, obj1, heap0>>20, heap1>>20, obj0, obj1,
mstats.nmalloc, mstats.nfree, mstats.nmalloc, mstats.nfree,
nhandoff); stats.nhandoff, stats.nhandoffcnt,
stats.nprocyield, stats.nosyield, stats.nsleep);
} }
runtime·MProf_GC(); runtime·MProf_GC();
...@@ -1022,7 +1035,7 @@ runtime·ReadMemStats(MStats *stats) ...@@ -1022,7 +1035,7 @@ runtime·ReadMemStats(MStats *stats)
runtime·semacquire(&runtime·worldsema); runtime·semacquire(&runtime·worldsema);
m->gcing = 1; m->gcing = 1;
runtime·stoptheworld(); runtime·stoptheworld();
cachestats(); cachestats(nil);
*stats = mstats; *stats = mstats;
m->gcing = 0; m->gcing = 0;
runtime·semrelease(&runtime·worldsema); runtime·semrelease(&runtime·worldsema);
......
...@@ -71,6 +71,7 @@ typedef struct Complex128 Complex128; ...@@ -71,6 +71,7 @@ typedef struct Complex128 Complex128;
typedef struct WinCall WinCall; typedef struct WinCall WinCall;
typedef struct Timers Timers; typedef struct Timers Timers;
typedef struct Timer Timer; typedef struct Timer Timer;
typedef struct GCStats GCStats;
/* /*
* per-cpu declaration. * per-cpu declaration.
...@@ -166,6 +167,16 @@ struct Gobuf ...@@ -166,6 +167,16 @@ struct Gobuf
byte* pc; byte* pc;
G* g; G* g;
}; };
struct GCStats
{
// the struct must consist of only uint64's,
// because it is casted to uint64[].
uint64 nhandoff;
uint64 nhandoffcnt;
uint64 nprocyield;
uint64 nosyield;
uint64 nsleep;
};
struct G struct G
{ {
byte* stackguard; // cannot move - also known to linker, libmach, runtime/cgo byte* stackguard; // cannot move - also known to linker, libmach, runtime/cgo
...@@ -243,6 +254,7 @@ struct M ...@@ -243,6 +254,7 @@ struct M
uintptr waitsema; // semaphore for parking on locks uintptr waitsema; // semaphore for parking on locks
uint32 waitsemacount; uint32 waitsemacount;
uint32 waitsemalock; uint32 waitsemalock;
GCStats gcstats;
#ifdef GOOS_windows #ifdef GOOS_windows
void* thread; // thread handle void* thread; // thread handle
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment