Commit e455002a authored by David Mosberger's avatar David Mosberger

ia64: Fix pdflush-triggered stack-overflow due to long thread-creation chains.

The pdflush kernel threads can lead to an unbounded chain of thread-creations
which can overflow the kernel stacks because we didn't uses to reset
the stack on kernel thread-creation.  We do now.

Reported by Andreas Schwab, tracked down with help from Keith Owens.
parent ac2b25e8
......@@ -816,6 +816,19 @@ GLOBAL_ENTRY(ia64_delay_loop)
br.ret.sptk.many rp
END(ia64_delay_loop)
GLOBAL_ENTRY(ia64_invoke_kernel_thread_helper)
.prologue
.save rp, r0 // this is the end of the call-chain
.body
alloc r2 = ar.pfs, 0, 0, 2, 0
mov out0 = r9
mov out1 = r11;;
br.call.sptk.many rp = kernel_thread_helper;;
mov out0 = r8
br.call.sptk.many rp = sys_exit;;
1: br.sptk.few 1b // not reached
END(ia64_invoke_kernel_thread_helper)
#ifdef CONFIG_IA64_BRL_EMU
/*
......
......@@ -259,10 +259,12 @@ ia64_load_extra (struct task_struct *task)
*
* We get here through the following call chain:
*
* <clone syscall>
* sys_clone
* do_fork
* copy_thread
* from user-level: from kernel:
*
* <clone syscall> <some kernel call frames>
* sys_clone :
* do_fork do_fork
* copy_thread copy_thread
*
* This means that the stack layout is as follows:
*
......@@ -276,9 +278,6 @@ ia64_load_extra (struct task_struct *task)
* | | <-- sp (lowest addr)
* +---------------------+
*
* Note: if we get called through kernel_thread() then the memory above "(highest addr)"
* is valid kernel stack memory that needs to be copied as well.
*
* Observe that we copy the unat values that are in pt_regs and switch_stack. Spilling an
* integer to address X causes bit N in ar.unat to be set to the NaT bit of the register,
* with N=(X & 0x1ff)/8. Thus, copying the unat value preserves the NaT bits ONLY if the
......@@ -291,9 +290,9 @@ copy_thread (int nr, unsigned long clone_flags,
unsigned long user_stack_base, unsigned long user_stack_size,
struct task_struct *p, struct pt_regs *regs)
{
unsigned long rbs, child_rbs, rbs_size, stack_offset, stack_top, stack_used;
struct switch_stack *child_stack, *stack;
extern char ia64_ret_from_clone, ia32_ret_from_clone;
struct switch_stack *child_stack, *stack;
unsigned long rbs, child_rbs, rbs_size;
struct pt_regs *child_ptregs;
int retval = 0;
......@@ -306,16 +305,13 @@ copy_thread (int nr, unsigned long clone_flags,
return 0;
#endif
stack_top = (unsigned long) current + IA64_STK_OFFSET;
stack = ((struct switch_stack *) regs) - 1;
stack_used = stack_top - (unsigned long) stack;
stack_offset = IA64_STK_OFFSET - stack_used;
child_stack = (struct switch_stack *) ((unsigned long) p + stack_offset);
child_ptregs = (struct pt_regs *) (child_stack + 1);
child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1;
child_stack = (struct switch_stack *) child_ptregs - 1;
/* copy parent's switch_stack & pt_regs to child: */
memcpy(child_stack, stack, stack_used);
memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack));
rbs = (unsigned long) current + IA64_RBS_OFFSET;
child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
......@@ -324,7 +320,7 @@ copy_thread (int nr, unsigned long clone_flags,
/* copy the parent's register backing store to the child: */
memcpy((void *) child_rbs, (void *) rbs, rbs_size);
if (user_mode(child_ptregs)) {
if (likely(user_mode(child_ptregs))) {
if ((clone_flags & CLONE_SETTLS) && !IS_IA32_PROCESS(regs))
child_ptregs->r13 = regs->r16; /* see sys_clone2() in entry.S */
if (user_stack_base) {
......@@ -341,14 +337,14 @@ copy_thread (int nr, unsigned long clone_flags,
* been taken care of by the caller of sys_clone()
* already.
*/
child_ptregs->r12 = (unsigned long) (child_ptregs + 1); /* kernel sp */
child_ptregs->r12 = (unsigned long) child_ptregs - 16; /* kernel sp */
child_ptregs->r13 = (unsigned long) p; /* set `current' pointer */
}
child_stack->ar_bspstore = child_rbs + rbs_size;
if (IS_IA32_PROCESS(regs))
child_stack->b0 = (unsigned long) &ia32_ret_from_clone;
else
child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
child_stack->ar_bspstore = child_rbs + rbs_size;
/* copy parts of thread_struct: */
p->thread.ksp = (unsigned long) child_stack - 16;
......@@ -578,12 +574,32 @@ ia64_set_personality (struct elf64_hdr *elf_ex, int ibcs2_interpreter)
pid_t
kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
{
struct task_struct *parent = current;
int result;
pid_t tid;
extern void ia64_invoke_kernel_thread_helper (void);
unsigned long *helper_fptr = (unsigned long *) &ia64_invoke_kernel_thread_helper;
struct {
struct switch_stack sw;
struct pt_regs pt;
} regs;
memset(&regs, 0, sizeof(regs));
regs.pt.cr_iip = helper_fptr[0]; /* set entry point (IP) */
regs.pt.r1 = helper_fptr[1]; /* set GP */
regs.pt.r9 = (unsigned long) fn; /* 1st argument */
regs.pt.r11 = (unsigned long) arg; /* 2nd argument */
/* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't read. */
regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
regs.pt.cr_ifs = 1UL << 63; /* mark as valid, empty frame */
regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET;
return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs.pt, 0, NULL, NULL);
}
EXPORT_SYMBOL(kernel_thread);
tid = clone(flags | CLONE_VM | CLONE_UNTRACED, 0);
if (parent != current) {
/* This gets called from kernel_thread() via ia64_invoke_thread_helper(). */
int
kernel_thread_helper (int (*fn)(void *), void *arg)
{
#ifdef CONFIG_IA32_SUPPORT
if (IS_IA32_PROCESS(ia64_task_regs(current))) {
/* A kernel thread is always a 64-bit process. */
......@@ -593,12 +609,8 @@ kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1);
}
#endif
result = (*fn)(arg);
_exit(result);
}
return tid;
return (*fn)(arg);
}
EXPORT_SYMBOL(kernel_thread);
/*
* Flush thread state. This is called when a thread does an execve().
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment