Commit 5e72fae9 authored by Dmitriy Vyukov's avatar Dmitriy Vyukov

runtime: improve cpu profiles for GC/syscalls/cgo

Current "System->etext" is not very informative.
Add parent "GC" frame.
Replace un-unwindable syscall/cgo frames with Go stack that leads to the call.

LGTM=rsc
R=rsc, alex.brainman, ality
CC=golang-codereviews
https://golang.org/cl/61270043
parent 44c252bd
......@@ -291,7 +291,14 @@ runtime·stdcall(void *fn, int32 count, ...)
m->libcall.fn = fn;
m->libcall.n = count;
m->libcall.args = (uintptr*)&count + 1;
if(m->profilehz != 0) {
// leave pc/sp for cpu profiler
m->libcallpc = (uintptr)runtime·getcallerpc(&fn);
m->libcallsp = (uintptr)runtime·getcallersp(&fn);
m->libcallg = g;
}
runtime·asmcgocall(runtime·asmstdcall, &m->libcall);
m->libcallsp = 0;
return (void*)m->libcall.r1;
}
......
......@@ -2104,10 +2104,10 @@ static struct {
uintptr pcbuf[100];
} prof;
static void
System(void)
{
}
static void System(void) {}
static void ExternalCode(void) {}
static void GC(void) {}
extern byte etext[];
// Called if we receive a SIGPROF signal.
void
......@@ -2221,9 +2221,35 @@ runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp, M *mp)
if(traceback)
n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
if(!traceback || n <= 0) {
n = 2;
prof.pcbuf[0] = (uintptr)pc;
prof.pcbuf[1] = (uintptr)System + 1;
// Normal traceback is impossible or has failed.
// See if it falls into several common cases.
n = 0;
if(mp->ncgo > 0 && mp->curg != nil &&
mp->curg->syscallpc != 0 && mp->curg->syscallsp != 0) {
// Cgo, we can't unwind and symbolize arbitrary C code,
// so instead collect Go stack that leads to the cgo call.
// This is especially important on windows, since all syscalls are cgo calls.
n = runtime·gentraceback(mp->curg->syscallpc, mp->curg->syscallsp, 0, mp->curg, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
}
#ifdef GOOS_windows
if(n == 0 && mp->libcallg != nil && mp->libcallpc != 0 && mp->libcallsp != 0) {
// Libcall, i.e. runtime syscall on windows.
// Collect Go stack that leads to the call.
n = runtime·gentraceback(mp->libcallpc, mp->libcallsp, 0, mp->libcallg, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
}
#endif
if(n == 0) {
// If all of the above has failed, account it against abstract "System" or "GC".
n = 2;
// "ExternalCode" is better than "etext".
if((uintptr)pc > (uintptr)etext)
pc = (byte*)ExternalCode + PCQuantum;
prof.pcbuf[0] = (uintptr)pc;
if(mp->gcing || mp->helpgc)
prof.pcbuf[1] = (uintptr)GC + PCQuantum;
else
prof.pcbuf[1] = (uintptr)System + PCQuantum;
}
}
prof.fn(prof.pcbuf, n);
runtime·unlock(&prof);
......
......@@ -350,6 +350,9 @@ struct M
// these are here because they are too large to be on the stack
// of low-level NOSPLIT functions.
LibCall libcall;
uintptr libcallpc; // for cpu profiler
uintptr libcallsp;
G* libcallg;
#endif
#ifdef GOOS_solaris
int32* perrno; // pointer to TLS errno
......
......@@ -343,19 +343,34 @@ TEXT runtime·usleep1(SB),NOSPLIT,$0
RET
MOVL m(CX), BP
// leave pc/sp for cpu profiler
MOVL (SP), SI
MOVL SI, m_libcallpc(BP)
LEAL 4(SP), SI
MOVL SI, m_libcallsp(BP)
MOVL g(BP), SI
MOVL SI, m_libcallg(BP)
MOVL m_g0(BP), SI
CMPL g(CX), SI
JNE 3(PC)
JNE usleep1_switch
// executing on m->g0 already
CALL AX
RET
JMP usleep1_ret
usleep1_switch:
// Switch to m->g0 stack and back.
MOVL (g_sched+gobuf_sp)(SI), SI
MOVL SP, -4(SI)
LEAL -4(SI), SP
CALL AX
MOVL 0(SP), SP
usleep1_ret:
get_tls(CX)
MOVL m(CX), BP
MOVL $0, m_libcallsp(BP)
RET
// Runs on OS stack. duration (in 100ns units) is in BX.
......
......@@ -337,20 +337,33 @@ TEXT runtime·usleep1(SB),NOSPLIT,$0
CALL AX
RET
MOVQ m(R15), R14
MOVQ m_g0(R14), R14
MOVQ m(R15), R13
// leave pc/sp for cpu profiler
MOVQ (SP), R12
MOVQ R12, m_libcallpc(R13)
LEAQ 8(SP), R12
MOVQ R12, m_libcallsp(R13)
MOVQ g(R13), R12
MOVQ R12, m_libcallg(R13)
MOVQ m_g0(R13), R14
CMPQ g(R15), R14
JNE 3(PC)
JNE usleep1_switch
// executing on m->g0 already
CALL AX
RET
JMP usleep1_ret
usleep1_switch:
// Switch to m->g0 stack and back.
MOVQ (g_sched+gobuf_sp)(R14), R14
MOVQ SP, -8(R14)
LEAQ -8(R14), SP
CALL AX
MOVQ 0(SP), SP
usleep1_ret:
MOVQ $0, m_libcallsp(R13)
RET
// Runs on OS stack. duration (in 100ns units) is in BX.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment