Commit dba623b1 authored by Russ Cox's avatar Russ Cox

runtime: reduce frame size for runtime.cgocallback_gofunc

Tying preemption to stack splits means that we have to able to
complete the call to exitsyscall (inside cgocallbackg at least for now)
without any stack split checks, meaning that the whole sequence
has to work within 128 bytes of stack, unless we increase the size
of the red zone. This CL frees up 24 bytes along that critical path
on amd64. (The 32-bit systems have plenty of space because all
their words are smaller.)

R=dvyukov
CC=golang-dev
https://golang.org/cl/11676043
parent e97c8706
......@@ -524,7 +524,7 @@ TEXT runtime·cgocallback(SB),7,$12-12
// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
// See cgocall.c for more details.
TEXT runtime·cgocallback_gofunc(SB),7,$12-12
TEXT runtime·cgocallback_gofunc(SB),7,$8-12
// If m is nil, Go did not create the current thread.
// Call needm to obtain one for temporary use.
// In this case, we're running on the thread stack, so there's
......@@ -532,13 +532,12 @@ TEXT runtime·cgocallback_gofunc(SB),7,$12-12
// the linker analysis by using an indirect call through AX.
get_tls(CX)
#ifdef GOOS_windows
MOVL $0, BP
CMPL CX, $0
JNE 3(PC)
PUSHL $0
JMP needm
JNE 2(PC)
#endif
MOVL m(CX), BP
PUSHL BP
MOVL BP, 4(SP)
CMPL BP, $0
JNE havem
needm:
......@@ -552,55 +551,42 @@ havem:
// Save current m->g0->sched.sp on stack and then set it to SP.
// Save current sp in m->g0->sched.sp in preparation for
// switch back to m->curg stack.
// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
MOVL m_g0(BP), SI
PUSHL (g_sched+gobuf_sp)(SI)
MOVL (g_sched+gobuf_sp)(SI), AX
MOVL AX, 0(SP)
MOVL SP, (g_sched+gobuf_sp)(SI)
// Switch to m->curg stack and call runtime.cgocallbackg
// with the three arguments. Because we are taking over
// the execution of m->curg but *not* resuming what had
// been running, we need to save that information (m->curg->sched)
// so that we can restore it when we're done.
// Switch to m->curg stack and call runtime.cgocallbackg.
// Because we are taking over the execution of m->curg
// but *not* resuming what had been running, we need to
// save that information (m->curg->sched) so we can restore it.
// We can restore m->curg->sched.sp easily, because calling
// runtime.cgocallbackg leaves SP unchanged upon return.
// To save m->curg->sched.pc, we push it onto the stack.
// This has the added benefit that it looks to the traceback
// routine like cgocallbackg is going to return to that
// PC (because we defined cgocallbackg to have
// a frame size of 12, the same amount that we use below),
// PC (because the frame we allocate below has the same
// size as cgocallback_gofunc's frame declared above)
// so that the traceback will seamlessly trace back into
// the earlier calls.
MOVL fn+0(FP), AX
MOVL frame+4(FP), BX
MOVL framesize+8(FP), DX
//
// In the new goroutine, 0(SP) and 4(SP) are unused except
// on Windows, where they are the SEH block.
MOVL m_curg(BP), SI
MOVL SI, g(CX)
MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
// Push gobuf.pc
MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
MOVL (g_sched+gobuf_pc)(SI), BP
SUBL $4, DI
MOVL BP, 0(DI)
// Push arguments to cgocallbackg.
// Frame size here must match the frame size above plus the pushes
// to trick traceback routines into doing the right thing.
SUBL $20, DI
MOVL AX, 0(DI)
MOVL BX, 4(DI)
MOVL DX, 8(DI)
// Switch stack and make the call.
MOVL DI, SP
MOVL BP, -4(DI)
LEAL -(4+8)(DI), SP
CALL runtime·cgocallbackg(SB)
// Restore g->sched (== m->curg->sched) from saved values.
get_tls(CX)
MOVL g(CX), SI
MOVL 20(SP), BP
MOVL 8(SP), BP
MOVL BP, (g_sched+gobuf_pc)(SI)
LEAL (20+4)(SP), DI
LEAL (8+4)(SP), DI
MOVL DI, (g_sched+gobuf_sp)(SI)
// Switch back to m->g0's stack and restore m->g0->sched.sp.
......@@ -610,11 +596,12 @@ havem:
MOVL m_g0(BP), SI
MOVL SI, g(CX)
MOVL (g_sched+gobuf_sp)(SI), SP
POPL (g_sched+gobuf_sp)(SI)
MOVL 0(SP), AX
MOVL AX, (g_sched+gobuf_sp)(SI)
// If the m on entry was nil, we called needm above to borrow an m
// for the duration of the call. Since the call is over, return it with dropm.
POPL BP
MOVL 8(SP), BP
CMPL BP, $0
JNE 3(PC)
MOVL $runtime·dropm(SB), AX
......
......@@ -563,7 +563,7 @@ TEXT runtime·cgocallback(SB),7,$24-24
// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
// See cgocall.c for more details.
TEXT runtime·cgocallback_gofunc(SB),7,$24-24
TEXT runtime·cgocallback_gofunc(SB),7,$16-24
// If m is nil, Go did not create the current thread.
// Call needm to obtain one for temporary use.
// In this case, we're running on the thread stack, so there's
......@@ -571,13 +571,12 @@ TEXT runtime·cgocallback_gofunc(SB),7,$24-24
// the linker analysis by using an indirect call through AX.
get_tls(CX)
#ifdef GOOS_windows
MOVL $0, BP
CMPQ CX, $0
JNE 3(PC)
PUSHQ $0
JMP needm
JNE 2(PC)
#endif
MOVQ m(CX), BP
PUSHQ BP
MOVQ BP, 8(SP)
CMPQ BP, $0
JNE havem
needm:
......@@ -591,55 +590,42 @@ havem:
// Save current m->g0->sched.sp on stack and then set it to SP.
// Save current sp in m->g0->sched.sp in preparation for
// switch back to m->curg stack.
// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
MOVQ m_g0(BP), SI
PUSHQ (g_sched+gobuf_sp)(SI)
MOVQ (g_sched+gobuf_sp)(SI), AX
MOVQ AX, 0(SP)
MOVQ SP, (g_sched+gobuf_sp)(SI)
// Switch to m->curg stack and call runtime.cgocallbackg
// with the three arguments. Because we are taking over
// the execution of m->curg but *not* resuming what had
// been running, we need to save that information (m->curg->sched)
// so that we can restore it when we're done.
// Switch to m->curg stack and call runtime.cgocallbackg.
// Because we are taking over the execution of m->curg
// but *not* resuming what had been running, we need to
// save that information (m->curg->sched) so we can restore it.
// We can restore m->curg->sched.sp easily, because calling
// runtime.cgocallbackg leaves SP unchanged upon return.
// To save m->curg->sched.pc, we push it onto the stack.
// This has the added benefit that it looks to the traceback
// routine like cgocallbackg is going to return to that
// PC (because we defined cgocallbackg to have
// a frame size of 24, the same amount that we use below),
// PC (because the frame we allocate below has the same
// size as cgocallback_gofunc's frame declared above)
// so that the traceback will seamlessly trace back into
// the earlier calls.
MOVQ fn+0(FP), AX
MOVQ frame+8(FP), BX
MOVQ framesize+16(FP), DX
//
// In the new goroutine, 0(SP) and 8(SP) are unused except
// on Windows, where they are the SEH block.
MOVQ m_curg(BP), SI
MOVQ SI, g(CX)
MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
// Push gobuf.pc
MOVQ (g_sched+gobuf_pc)(SI), BP
SUBQ $8, DI
MOVQ BP, 0(DI)
// Push arguments to cgocallbackg.
// Frame size here must match the frame size above plus the pushes
// to trick traceback routines into doing the right thing.
SUBQ $40, DI
MOVQ AX, 0(DI)
MOVQ BX, 8(DI)
MOVQ DX, 16(DI)
// Switch stack and make the call.
MOVQ DI, SP
MOVQ BP, -8(DI)
LEAQ -(8+16)(DI), SP
CALL runtime·cgocallbackg(SB)
// Restore g->sched (== m->curg->sched) from saved values.
get_tls(CX)
MOVQ g(CX), SI
MOVQ 40(SP), BP
MOVQ 16(SP), BP
MOVQ BP, (g_sched+gobuf_pc)(SI)
LEAQ (40+8)(SP), DI
LEAQ (16+8)(SP), DI
MOVQ DI, (g_sched+gobuf_sp)(SI)
// Switch back to m->g0's stack and restore m->g0->sched.sp.
......@@ -649,11 +635,12 @@ havem:
MOVQ m_g0(BP), SI
MOVQ SI, g(CX)
MOVQ (g_sched+gobuf_sp)(SI), SP
POPQ (g_sched+gobuf_sp)(SI)
MOVQ 0(SP), AX
MOVQ AX, (g_sched+gobuf_sp)(SI)
// If the m on entry was nil, we called needm above to borrow an m
// for the duration of the call. Since the call is over, return it with dropm.
POPQ BP
MOVQ 8(SP), BP
CMPQ BP, $0
JNE 3(PC)
MOVQ $runtime·dropm(SB), AX
......
......@@ -331,7 +331,7 @@ TEXT runtime·cgocallback(SB),7,$12-12
// cgocallback_gofunc(void (*fn)(void*), void *frame, uintptr framesize)
// See cgocall.c for more details.
TEXT runtime·cgocallback_gofunc(SB),7,$12-12
TEXT runtime·cgocallback_gofunc(SB),7,$8-12
// Load m and g from thread-local storage.
MOVW _cgo_load_gm(SB), R0
CMP $0, R0
......@@ -342,7 +342,7 @@ TEXT runtime·cgocallback_gofunc(SB),7,$12-12
// In this case, we're running on the thread stack, so there's
// lots of space, but the linker doesn't know. Hide the call from
// the linker analysis by using an indirect call.
MOVW m, savedm-12(SP)
MOVW m, savedm-4(SP)
CMP $0, m
B.NE havem
MOVW $runtime·needm(SB), R0
......@@ -353,51 +353,41 @@ havem:
// Save current m->g0->sched.sp on stack and then set it to SP.
// Save current sp in m->g0->sched.sp in preparation for
// switch back to m->curg stack.
// NOTE: unwindm knows that the saved g->sched.sp is at 4(R13) aka savedsp-8(SP).
MOVW m_g0(m), R3
MOVW (g_sched+gobuf_sp)(R3), R4
MOVW.W R4, -4(R13)
MOVW R4, savedsp-8(SP)
MOVW R13, (g_sched+gobuf_sp)(R3)
// Switch to m->curg stack and call runtime.cgocallbackg
// with the three arguments. Because we are taking over
// the execution of m->curg but *not* resuming what had
// been running, we need to save that information (m->curg->sched)
// so that we can restore it when we're done.
// Switch to m->curg stack and call runtime.cgocallbackg.
// Because we are taking over the execution of m->curg
// but *not* resuming what had been running, we need to
// save that information (m->curg->sched) so we can restore it.
// We can restore m->curg->sched.sp easily, because calling
// runtime.cgocallbackg leaves SP unchanged upon return.
// To save m->curg->sched.pc, we push it onto the stack.
// This has the added benefit that it looks to the traceback
// routine like cgocallbackg is going to return to that
// PC (because we defined cgocallbackg to have
// a frame size of 12, the same amount that we use below),
// PC (because the frame we allocate below has the same
// size as cgocallback_gofunc's frame declared above)
// so that the traceback will seamlessly trace back into
// the earlier calls.
//
// In the new goroutine, -8(SP) and -4(SP) are unused.
MOVW fn+4(FP), R0
MOVW frame+8(FP), R1
MOVW framesize+12(FP), R2
MOVW m_curg(m), g
MOVW (g_sched+gobuf_sp)(g), R4 // prepare stack as R4
// Push gobuf.pc
// Frame size here must match the frame size above plus the push
// to trick traceback routines into doing the right thing.
MOVW (g_sched+gobuf_pc)(g), R5
MOVW.W R5, -20(R4)
// Push arguments to cgocallbackg.
MOVW R0, 4(R4)
MOVW R1, 8(R4)
MOVW R2, 12(R4)
// Switch stack and make the call.
MOVW R4, R13
MOVW R5, -12(R4)
MOVW $-12(R4), R13
BL runtime·cgocallbackg(SB)
// Restore g->sched (== m->curg->sched) from saved values.
MOVW 0(R13), R5
MOVW R5, (g_sched+gobuf_pc)(g)
ADD $(16+4), R13, R4
MOVW $12(R13), R4
MOVW R4, (g_sched+gobuf_sp)(g)
// Switch back to m->g0's stack and restore m->g0->sched.sp.
......@@ -405,14 +395,12 @@ havem:
// so we do not have to restore it.)
MOVW m_g0(m), g
MOVW (g_sched+gobuf_sp)(g), R13
// POP R6
MOVW 0(R13), R6
ADD $4, R13
MOVW R6, (g_sched+gobuf_sp)(g)
MOVW savedsp-8(SP), R4
MOVW R4, (g_sched+gobuf_sp)(g)
// If the m on entry was nil, we called needm above to borrow an m
// for the duration of the call. Since the call is over, return it with dropm.
MOVW savedm-12(SP), R6
MOVW savedm-4(SP), R6
CMP $0, R6
B.NE 3(PC)
MOVW $runtime·dropm(SB), R0
......
......@@ -228,13 +228,25 @@ runtime·cfree(void *p)
static FuncVal unwindmf = {unwindm};
typedef struct CallbackArgs CallbackArgs;
struct CallbackArgs
{
FuncVal *fn;
void *arg;
uintptr argsize;
};
#define CBARGS (CallbackArgs*)((byte*)m->g0->sched.sp+(3+(thechar=='5'))*sizeof(void*))
void
runtime·cgocallbackg(FuncVal *fn, void *arg, uintptr argsize)
runtime·cgocallbackg(void)
{
Defer d;
CallbackArgs *cb;
if(m->racecall) {
reflect·call(fn, arg, argsize);
cb = CBARGS;
reflect·call(cb->fn, cb->arg, cb->argsize);
return;
}
......@@ -261,7 +273,8 @@ runtime·cgocallbackg(FuncVal *fn, void *arg, uintptr argsize)
runtime·raceacquire(&cgosync);
// Invoke callback.
reflect·call(fn, arg, argsize);
cb = CBARGS;
reflect·call(cb->fn, cb->arg, cb->argsize);
if(raceenabled)
runtime·racereleasemerge(&cgosync);
......@@ -286,9 +299,11 @@ unwindm(void)
runtime·throw("runtime: unwindm not implemented");
case '8':
case '6':
case '5':
m->g0->sched.sp = *(uintptr*)m->g0->sched.sp;
break;
case '5':
m->g0->sched.sp = *(uintptr*)((byte*)m->g0->sched.sp + 4);
break;
}
}
......
......@@ -651,10 +651,10 @@ runtime·needm(byte x)
g->stackguard0 = g->stackguard;
// On windows/386, we need to put an SEH frame (two words)
// somewhere on the current stack. We are called
// from needm, and we know there is some available
// space one word into the argument frame. Use that.
m->seh = (SEH*)((uintptr*)&x + 1);
// somewhere on the current stack. We are called from cgocallback_gofunc
// and we know that it will leave two unused words below m->curg->sched.sp.
// Use those.
m->seh = (SEH*)((uintptr*)m->curg->sched.sp - 3);
// Initialize this thread to use the m.
runtime·asminit();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment