Commit e4b02bfd authored by Russ Cox's avatar Russ Cox

runtime: goroutine profile, stack dumps

R=golang-dev, r, r
CC=golang-dev
https://golang.org/cl/5687076
parent 133c6bf7
...@@ -30,7 +30,7 @@ func NumCPU() int ...@@ -30,7 +30,7 @@ func NumCPU() int
func NumCgoCall() int64 func NumCgoCall() int64
// NumGoroutine returns the number of goroutines that currently exist. // NumGoroutine returns the number of goroutines that currently exist.
func NumGoroutine() int32 func NumGoroutine() int
// MemProfileRate controls the fraction of memory allocations // MemProfileRate controls the fraction of memory allocations
// that are recorded and reported in the memory profile. // that are recorded and reported in the memory profile.
...@@ -89,15 +89,14 @@ func (r *MemProfileRecord) Stack() []uintptr { ...@@ -89,15 +89,14 @@ func (r *MemProfileRecord) Stack() []uintptr {
// of calling MemProfile directly. // of calling MemProfile directly.
func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool)
// A ThreadProfileRecord describes the execution stack that // A StackRecord describes a single execution stack.
// caused a new thread to be created. type StackRecord struct {
type ThreadProfileRecord struct {
Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry
} }
// Stack returns the stack trace associated with the record, // Stack returns the stack trace associated with the record,
// a prefix of r.Stack0. // a prefix of r.Stack0.
func (r *ThreadProfileRecord) Stack() []uintptr { func (r *StackRecord) Stack() []uintptr {
for i, v := range r.Stack0 { for i, v := range r.Stack0 {
if v == 0 { if v == 0 {
return r.Stack0[0:i] return r.Stack0[0:i]
...@@ -106,13 +105,21 @@ func (r *ThreadProfileRecord) Stack() []uintptr { ...@@ -106,13 +105,21 @@ func (r *ThreadProfileRecord) Stack() []uintptr {
return r.Stack0[0:] return r.Stack0[0:]
} }
// ThreadProfile returns n, the number of records in the current thread profile. // ThreadCreateProfile returns n, the number of records in the thread creation profile.
// If len(p) >= n, ThreadProfile copies the profile into p and returns n, true. // If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true.
// If len(p) < n, ThreadProfile does not change p and returns n, false. // If len(p) < n, ThreadCreateProfile does not change p and returns n, false.
// //
// Most clients should use the runtime/pprof package instead // Most clients should use the runtime/pprof package instead
// of calling ThreadProfile directly. // of calling ThreadCreateProfile directly.
func ThreadProfile(p []ThreadProfileRecord) (n int, ok bool) func ThreadCreateProfile(p []StackRecord) (n int, ok bool)
// GoroutineProfile returns n, the number of records in the active goroutine stack profile.
// If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
// If len(p) < n, GoroutineProfile does not change p and returns n, false.
//
// Most clients should use the runtime/pprof package instead
// of calling GoroutineProfile directly.
func GoroutineProfile(p []StackRecord) (n int, ok bool)
// CPUProfile returns the next chunk of binary CPU profiling stack trace data, // CPUProfile returns the next chunk of binary CPU profiling stack trace data,
// blocking until data is available. If profiling is turned off and all the profile // blocking until data is available. If profiling is turned off and all the profile
...@@ -130,3 +137,9 @@ func CPUProfile() []byte ...@@ -130,3 +137,9 @@ func CPUProfile() []byte
// the testing package's -test.cpuprofile flag instead of calling // the testing package's -test.cpuprofile flag instead of calling
// SetCPUProfileRate directly. // SetCPUProfileRate directly.
func SetCPUProfileRate(hz int) func SetCPUProfileRate(hz int)
// Stack formats a stack trace of the calling goroutine into buf
// and returns the number of bytes written to buf.
// If all is true, Stack formats stack traces of all other goroutines
// into buf after the trace for the current goroutine.
func Stack(buf []byte, all bool) int
...@@ -411,17 +411,9 @@ enum ...@@ -411,17 +411,9 @@ enum
void runtime·MProf_Malloc(void*, uintptr); void runtime·MProf_Malloc(void*, uintptr);
void runtime·MProf_Free(void*, uintptr); void runtime·MProf_Free(void*, uintptr);
void runtime·MProf_GC(void);
int32 runtime·helpgc(bool*); int32 runtime·helpgc(bool*);
void runtime·gchelper(void); void runtime·gchelper(void);
// Malloc profiling settings.
// Must match definition in extern.go.
enum {
MProf_None = 0,
MProf_Sample = 1,
MProf_All = 2,
};
extern int32 runtime·malloc_profile;
bool runtime·getfinalizer(void *p, bool del, void (**fn)(void*), int32 *nret); bool runtime·getfinalizer(void *p, bool del, void (**fn)(void*), int32 *nret);
void runtime·walkfintab(void (*fn)(void*)); void runtime·walkfintab(void (*fn)(void*));
...@@ -52,6 +52,21 @@ enum { ...@@ -52,6 +52,21 @@ enum {
#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial) #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
// Holding worldsema grants an M the right to try to stop the world.
// The procedure is:
//
// runtime·semacquire(&runtime·worldsema);
// m->gcing = 1;
// runtime·stoptheworld();
//
// ... do stuff ...
//
// m->gcing = 0;
// runtime·semrelease(&runtime·worldsema);
// runtime·starttheworld();
//
uint32 runtime·worldsema = 1;
// TODO: Make these per-M. // TODO: Make these per-M.
static uint64 nhandoff; static uint64 nhandoff;
...@@ -816,11 +831,6 @@ runtime·gchelper(void) ...@@ -816,11 +831,6 @@ runtime·gchelper(void)
runtime·notewakeup(&work.alldone); runtime·notewakeup(&work.alldone);
} }
// Semaphore, not Lock, so that the goroutine
// reschedules when there is contention rather
// than spinning.
static uint32 gcsema = 1;
// Initialized from $GOGC. GOGC=off means no gc. // Initialized from $GOGC. GOGC=off means no gc.
// //
// Next gc is after we've allocated an extra amount of // Next gc is after we've allocated an extra amount of
...@@ -903,9 +913,9 @@ runtime·gc(int32 force) ...@@ -903,9 +913,9 @@ runtime·gc(int32 force)
if(gcpercent < 0) if(gcpercent < 0)
return; return;
runtime·semacquire(&gcsema); runtime·semacquire(&runtime·worldsema);
if(!force && mstats.heap_alloc < mstats.next_gc) { if(!force && mstats.heap_alloc < mstats.next_gc) {
runtime·semrelease(&gcsema); runtime·semrelease(&runtime·worldsema);
return; return;
} }
...@@ -981,8 +991,9 @@ runtime·gc(int32 force) ...@@ -981,8 +991,9 @@ runtime·gc(int32 force)
mstats.nmalloc, mstats.nfree, mstats.nmalloc, mstats.nfree,
nhandoff); nhandoff);
} }
runtime·semrelease(&gcsema); runtime·MProf_GC();
runtime·semrelease(&runtime·worldsema);
// If we could have used another helper proc, start one now, // If we could have used another helper proc, start one now,
// in the hope that it will be available next time. // in the hope that it will be available next time.
...@@ -1004,17 +1015,17 @@ runtime·gc(int32 force) ...@@ -1004,17 +1015,17 @@ runtime·gc(int32 force)
void void
runtime·ReadMemStats(MStats *stats) runtime·ReadMemStats(MStats *stats)
{ {
// Have to acquire gcsema to stop the world, // Have to acquire worldsema to stop the world,
// because stoptheworld can only be used by // because stoptheworld can only be used by
// one goroutine at a time, and there might be // one goroutine at a time, and there might be
// a pending garbage collection already calling it. // a pending garbage collection already calling it.
runtime·semacquire(&gcsema); runtime·semacquire(&runtime·worldsema);
m->gcing = 1; m->gcing = 1;
runtime·stoptheworld(); runtime·stoptheworld();
cachestats(); cachestats();
*stats = mstats; *stats = mstats;
m->gcing = 0; m->gcing = 0;
runtime·semrelease(&gcsema); runtime·semrelease(&runtime·worldsema);
runtime·starttheworld(false); runtime·starttheworld(false);
} }
......
...@@ -26,6 +26,10 @@ struct Bucket ...@@ -26,6 +26,10 @@ struct Bucket
uintptr frees; uintptr frees;
uintptr alloc_bytes; uintptr alloc_bytes;
uintptr free_bytes; uintptr free_bytes;
uintptr recent_allocs; // since last gc
uintptr recent_frees;
uintptr recent_alloc_bytes;
uintptr recent_free_bytes;
uintptr hash; uintptr hash;
uintptr nstk; uintptr nstk;
uintptr stk[1]; uintptr stk[1];
...@@ -39,7 +43,7 @@ static uintptr bucketmem; ...@@ -39,7 +43,7 @@ static uintptr bucketmem;
// Return the bucket for stk[0:nstk], allocating new bucket if needed. // Return the bucket for stk[0:nstk], allocating new bucket if needed.
static Bucket* static Bucket*
stkbucket(uintptr *stk, int32 nstk) stkbucket(uintptr *stk, int32 nstk, bool alloc)
{ {
int32 i; int32 i;
uintptr h; uintptr h;
...@@ -66,6 +70,9 @@ stkbucket(uintptr *stk, int32 nstk) ...@@ -66,6 +70,9 @@ stkbucket(uintptr *stk, int32 nstk)
runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0) runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
return b; return b;
if(!alloc)
return nil;
b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1); b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
bucketmem += sizeof *b + nstk*sizeof stk[0]; bucketmem += sizeof *b + nstk*sizeof stk[0];
runtime·memmove(b->stk, stk, nstk*sizeof stk[0]); runtime·memmove(b->stk, stk, nstk*sizeof stk[0]);
...@@ -78,6 +85,26 @@ stkbucket(uintptr *stk, int32 nstk) ...@@ -78,6 +85,26 @@ stkbucket(uintptr *stk, int32 nstk)
return b; return b;
} }
// Record that a gc just happened: all the 'recent' statistics are now real.
void
runtime·MProf_GC(void)
{
Bucket *b;
runtime·lock(&proflock);
for(b=buckets; b; b=b->allnext) {
b->allocs += b->recent_allocs;
b->frees += b->recent_frees;
b->alloc_bytes += b->recent_alloc_bytes;
b->free_bytes += b->recent_free_bytes;
b->recent_allocs = 0;
b->recent_frees = 0;
b->recent_alloc_bytes = 0;
b->recent_free_bytes = 0;
}
runtime·unlock(&proflock);
}
// Map from pointer to Bucket* that allocated it. // Map from pointer to Bucket* that allocated it.
// Three levels: // Three levels:
// Linked-list hash table for top N-20 bits. // Linked-list hash table for top N-20 bits.
...@@ -198,9 +225,9 @@ runtime·MProf_Malloc(void *p, uintptr size) ...@@ -198,9 +225,9 @@ runtime·MProf_Malloc(void *p, uintptr size)
m->nomemprof++; m->nomemprof++;
nstk = runtime·callers(1, stk, 32); nstk = runtime·callers(1, stk, 32);
runtime·lock(&proflock); runtime·lock(&proflock);
b = stkbucket(stk, nstk); b = stkbucket(stk, nstk, true);
b->allocs++; b->recent_allocs++;
b->alloc_bytes += size; b->recent_alloc_bytes += size;
setaddrbucket((uintptr)p, b); setaddrbucket((uintptr)p, b);
runtime·unlock(&proflock); runtime·unlock(&proflock);
m->nomemprof--; m->nomemprof--;
...@@ -219,8 +246,8 @@ runtime·MProf_Free(void *p, uintptr size) ...@@ -219,8 +246,8 @@ runtime·MProf_Free(void *p, uintptr size)
runtime·lock(&proflock); runtime·lock(&proflock);
b = getaddrbucket((uintptr)p); b = getaddrbucket((uintptr)p);
if(b != nil) { if(b != nil) {
b->frees++; b->recent_frees++;
b->free_bytes += size; b->recent_free_bytes += size;
} }
runtime·unlock(&proflock); runtime·unlock(&proflock);
m->nomemprof--; m->nomemprof--;
...@@ -274,13 +301,13 @@ func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) { ...@@ -274,13 +301,13 @@ func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) {
runtime·unlock(&proflock); runtime·unlock(&proflock);
} }
// Must match ThreadProfileRecord in debug.go. // Must match StackRecord in debug.go.
typedef struct TRecord TRecord; typedef struct TRecord TRecord;
struct TRecord { struct TRecord {
uintptr stk[32]; uintptr stk[32];
}; };
func ThreadProfile(p Slice) (n int32, ok bool) { func ThreadCreateProfile(p Slice) (n int32, ok bool) {
TRecord *r; TRecord *r;
M *first, *m; M *first, *m;
...@@ -298,3 +325,80 @@ func ThreadProfile(p Slice) (n int32, ok bool) { ...@@ -298,3 +325,80 @@ func ThreadProfile(p Slice) (n int32, ok bool) {
} }
} }
} }
func Stack(b Slice, all bool) (n int32) {
byte *pc, *sp;
sp = runtime·getcallersp(&b);
pc = runtime·getcallerpc(&b);
if(all) {
runtime·semacquire(&runtime·worldsema);
m->gcing = 1;
runtime·stoptheworld();
}
if(b.len == 0)
n = 0;
else{
g->writebuf = (byte*)b.array;
g->writenbuf = b.len;
runtime·goroutineheader(g);
runtime·traceback(pc, sp, 0, g);
if(all)
runtime·tracebackothers(g);
n = b.len - g->writenbuf;
g->writebuf = nil;
g->writenbuf = 0;
}
if(all) {
m->gcing = 0;
runtime·semrelease(&runtime·worldsema);
runtime·starttheworld(false);
}
}
static void
saveg(byte *pc, byte *sp, G *g, TRecord *r)
{
int32 n;
n = runtime·gentraceback(pc, sp, 0, g, 0, r->stk, nelem(r->stk));
if(n < nelem(r->stk))
r->stk[n] = 0;
}
func GoroutineProfile(b Slice) (n int32, ok bool) {
byte *pc, *sp;
TRecord *r;
G *gp;
sp = runtime·getcallersp(&b);
pc = runtime·getcallerpc(&b);
ok = false;
n = runtime·gcount();
if(n <= b.len) {
runtime·semacquire(&runtime·worldsema);
m->gcing = 1;
runtime·stoptheworld();
n = runtime·gcount();
if(n <= b.len) {
ok = true;
r = (TRecord*)b.array;
saveg(pc, sp, g, r++);
for(gp = runtime·allg; gp != nil; gp = gp->alllink) {
if(gp == g || gp->status == Gdead)
continue;
saveg(gp->sched.pc, gp->sched.sp, gp, r++);
}
}
m->gcing = 0;
runtime·semrelease(&runtime·worldsema);
runtime·starttheworld(false);
}
}
...@@ -9,6 +9,26 @@ ...@@ -9,6 +9,26 @@
static void vprintf(int8*, byte*); static void vprintf(int8*, byte*);
// write to goroutine-local buffer if diverting output,
// or else standard error.
static void
gwrite(void *v, int32 n)
{
if(g == nil || g->writebuf == nil) {
runtime·write(2, v, n);
return;
}
if(g->writenbuf == 0)
return;
if(n > g->writenbuf)
n = g->writenbuf;
runtime·memmove(g->writebuf, v, n);
g->writebuf += n;
g->writenbuf -= n;
}
void void
runtime·dump(byte *p, int32 n) runtime·dump(byte *p, int32 n)
{ {
...@@ -29,7 +49,7 @@ runtime·dump(byte *p, int32 n) ...@@ -29,7 +49,7 @@ runtime·dump(byte *p, int32 n)
void void
runtime·prints(int8 *s) runtime·prints(int8 *s)
{ {
runtime·write(2, s, runtime·findnull((byte*)s)); gwrite(s, runtime·findnull((byte*)s));
} }
#pragma textflag 7 #pragma textflag 7
...@@ -59,7 +79,7 @@ vprintf(int8 *s, byte *base) ...@@ -59,7 +79,7 @@ vprintf(int8 *s, byte *base)
if(*p != '%') if(*p != '%')
continue; continue;
if(p > lp) if(p > lp)
runtime·write(2, lp, p-lp); gwrite(lp, p-lp);
p++; p++;
narg = 0; narg = 0;
switch(*p) { switch(*p) {
...@@ -150,7 +170,7 @@ vprintf(int8 *s, byte *base) ...@@ -150,7 +170,7 @@ vprintf(int8 *s, byte *base)
lp = p+1; lp = p+1;
} }
if(p > lp) if(p > lp)
runtime·write(2, lp, p-lp); gwrite(lp, p-lp);
//runtime·unlock(&debuglock); //runtime·unlock(&debuglock);
} }
...@@ -176,10 +196,10 @@ void ...@@ -176,10 +196,10 @@ void
runtime·printbool(bool v) runtime·printbool(bool v)
{ {
if(v) { if(v) {
runtime·write(2, (byte*)"true", 4); gwrite((byte*)"true", 4);
return; return;
} }
runtime·write(2, (byte*)"false", 5); gwrite((byte*)"false", 5);
} }
void void
...@@ -190,15 +210,15 @@ runtime·printfloat(float64 v) ...@@ -190,15 +210,15 @@ runtime·printfloat(float64 v)
float64 h; float64 h;
if(runtime·isNaN(v)) { if(runtime·isNaN(v)) {
runtime·write(2, "NaN", 3); gwrite("NaN", 3);
return; return;
} }
if(runtime·isInf(v, 1)) { if(runtime·isInf(v, 1)) {
runtime·write(2, "+Inf", 4); gwrite("+Inf", 4);
return; return;
} }
if(runtime·isInf(v, -1)) { if(runtime·isInf(v, -1)) {
runtime·write(2, "-Inf", 4); gwrite("-Inf", 4);
return; return;
} }
...@@ -257,16 +277,16 @@ runtime·printfloat(float64 v) ...@@ -257,16 +277,16 @@ runtime·printfloat(float64 v)
buf[n+4] = (e/100) + '0'; buf[n+4] = (e/100) + '0';
buf[n+5] = (e/10)%10 + '0'; buf[n+5] = (e/10)%10 + '0';
buf[n+6] = (e%10) + '0'; buf[n+6] = (e%10) + '0';
runtime·write(2, buf, n+7); gwrite(buf, n+7);
} }
void void
runtime·printcomplex(Complex128 v) runtime·printcomplex(Complex128 v)
{ {
runtime·write(2, "(", 1); gwrite("(", 1);
runtime·printfloat(v.real); runtime·printfloat(v.real);
runtime·printfloat(v.imag); runtime·printfloat(v.imag);
runtime·write(2, "i)", 2); gwrite("i)", 2);
} }
void void
...@@ -281,14 +301,14 @@ runtime·printuint(uint64 v) ...@@ -281,14 +301,14 @@ runtime·printuint(uint64 v)
break; break;
v = v/10; v = v/10;
} }
runtime·write(2, buf+i, nelem(buf)-i); gwrite(buf+i, nelem(buf)-i);
} }
void void
runtime·printint(int64 v) runtime·printint(int64 v)
{ {
if(v < 0) { if(v < 0) {
runtime·write(2, "-", 1); gwrite("-", 1);
v = -v; v = -v;
} }
runtime·printuint(v); runtime·printuint(v);
...@@ -308,7 +328,7 @@ runtime·printhex(uint64 v) ...@@ -308,7 +328,7 @@ runtime·printhex(uint64 v)
buf[--i] = '0'; buf[--i] = '0';
buf[--i] = 'x'; buf[--i] = 'x';
buf[--i] = '0'; buf[--i] = '0';
runtime·write(2, buf+i, nelem(buf)-i); gwrite(buf+i, nelem(buf)-i);
} }
void void
...@@ -323,23 +343,23 @@ runtime·printstring(String v) ...@@ -323,23 +343,23 @@ runtime·printstring(String v)
extern uint32 runtime·maxstring; extern uint32 runtime·maxstring;
if(v.len > runtime·maxstring) { if(v.len > runtime·maxstring) {
runtime·write(2, "[invalid string]", 16); gwrite("[invalid string]", 16);
return; return;
} }
if(v.len > 0) if(v.len > 0)
runtime·write(2, v.str, v.len); gwrite(v.str, v.len);
} }
void void
runtime·printsp(void) runtime·printsp(void)
{ {
runtime·write(2, " ", 1); gwrite(" ", 1);
} }
void void
runtime·printnl(void) runtime·printnl(void)
{ {
runtime·write(2, "\n", 1); gwrite("\n", 1);
} }
void void
......
...@@ -1664,6 +1664,12 @@ runtime·NumGoroutine(int32 ret) ...@@ -1664,6 +1664,12 @@ runtime·NumGoroutine(int32 ret)
FLUSH(&ret); FLUSH(&ret);
} }
int32
runtime·gcount(void)
{
return runtime·sched.gcount;
}
int32 int32
runtime·mcount(void) runtime·mcount(void)
{ {
......
...@@ -191,6 +191,8 @@ struct G ...@@ -191,6 +191,8 @@ struct G
M* lockedm; M* lockedm;
M* idlem; M* idlem;
int32 sig; int32 sig;
int32 writenbuf;
byte* writebuf;
uintptr sigcode0; uintptr sigcode0;
uintptr sigcode1; uintptr sigcode1;
uintptr sigpc; uintptr sigpc;
...@@ -545,6 +547,7 @@ bool runtime·addfinalizer(void*, void(*fn)(void*), int32); ...@@ -545,6 +547,7 @@ bool runtime·addfinalizer(void*, void(*fn)(void*), int32);
void runtime·runpanic(Panic*); void runtime·runpanic(Panic*);
void* runtime·getcallersp(void*); void* runtime·getcallersp(void*);
int32 runtime·mcount(void); int32 runtime·mcount(void);
int32 runtime·gcount(void);
void runtime·mcall(void(*)(G*)); void runtime·mcall(void(*)(G*));
uint32 runtime·fastrand1(void); uint32 runtime·fastrand1(void);
...@@ -585,10 +588,9 @@ int64 runtime·cputicks(void); ...@@ -585,10 +588,9 @@ int64 runtime·cputicks(void);
#pragma varargck type "s" uint8* #pragma varargck type "s" uint8*
#pragma varargck type "S" String #pragma varargck type "S" String
// TODO(rsc): Remove. These are only temporary,
// for the mark and sweep collector.
void runtime·stoptheworld(void); void runtime·stoptheworld(void);
void runtime·starttheworld(bool); void runtime·starttheworld(bool);
extern uint32 runtime·worldsema;
/* /*
* mutual exclusion locks. in the uncontended case, * mutual exclusion locks. in the uncontended case,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment