Commit d89f3847 authored by Ingo Molnar's avatar Ingo Molnar

[PATCH] thread-aware coredumps, 2.5.43-C3

This is the second iteration of thread-aware coredumps.

Changes:

- Ulrich Drepper has reviewed the data structures and checked actual
  coredumps via readelf - everything looks fine and according to the spec.

- a serious bug has been fixed in the thread-state dumping code - it was
  still based on the 2.4 assumption that the task struct points to the
  kernel stack - it's task->thread_info in 2.5. This bug caused bogus
  register info to be filled in for threads.

- properly wait for all threads that share the same MM to serialize with
  the coredumping thread. This is CLONE_VM based, not tied to
  CLONE_THREAD and/or signal semantics, ie. old-style (or different-style)
  threaded apps will be properly stopped as well.

  The locking might look a bit complex, but i wanted to keep the
  __exit_mm() overhead as low as possible. It's not quite trivial to get
  these bits right, because 'sharing the MM' is detached from signals
  semantics, so we cannot rely on broadcast-kill catching all threads. So
  zap_threads() iterates through every thread and zaps those which were
  left out. (There's a minimal race left in where a newly forked child
  might escape the attention of zap_threads() - this race is fixed by the
  OOM fixes in the mmap-speedup patch.)

- fill_psinfo() is now called with the thread group leader, for the
  coredump to get 'process' state.

 - initialize the elf_thread_status structure with zeroes.

the IA64 ELF bits are not included, yet, to reduce complexity of the
patch. The patch has been tested on x86 UP and SMP.
parent 5a7728c6
......@@ -528,3 +528,40 @@ int dump_extended_fpu( struct pt_regs *regs, struct user_fxsr_struct *fpu )
return fpvalid;
}
int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu)
{
int fpvalid = tsk->used_math;
if (fpvalid) {
if (tsk == current)
unlazy_fpu(tsk);
if (cpu_has_fxsr)
copy_fpu_fxsave(tsk, fpu);
else
copy_fpu_fsave(tsk, fpu);
}
return fpvalid;
}
int dump_task_extended_fpu(struct task_struct *tsk, struct user_fxsr_struct *fpu)
{
int fpvalid = tsk->used_math && cpu_has_fxsr;
if (fpvalid) {
if (tsk == current)
unlazy_fpu(tsk);
memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(*fpu));
}
return fpvalid;
}
#ifdef CONFIG_SMP
void dump_smp_unlazy_fpu(void)
{
unlazy_fpu(current);
return;
}
#endif
......@@ -19,6 +19,7 @@
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/elfcore.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/stddef.h>
......@@ -373,6 +374,25 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
dump->u_fpvalid = dump_fpu (regs, &dump->i387);
}
/*
* Capture the user space registers if the task is not running (in user space)
*/
int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
{
struct pt_regs ptregs;
ptregs = *(struct pt_regs *)
((unsigned long)tsk->thread_info+THREAD_SIZE - sizeof(ptregs));
ptregs.xcs &= 0xffff;
ptregs.xds &= 0xffff;
ptregs.xes &= 0xffff;
ptregs.xss &= 0xffff;
elf_core_copy_regs(regs, &ptregs);
return 1;
}
/*
* This special macro can be used to load a debugging register
*/
......
......@@ -381,6 +381,31 @@ copy_thread (int nr, unsigned long clone_flags,
void
do_copy_regs (struct unw_frame_info *info, void *arg)
{
do_copy_task_regs(current, info, arg);
}
void
do_dump_fpu (struct unw_frame_info *info, void *arg)
{
do_dump_task_fpu(current, info, arg);
}
void
ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
{
unw_init_running(do_copy_regs, dst);
}
int
dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
{
unw_init_running(do_dump_fpu, dst);
return 1; /* f0-f31 are always valid so we always return 1 */
}
static void
do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void *arg)
{
unsigned long mask, sp, nat_bits = 0, ip, ar_rnat, urbs_end, cfm;
elf_greg_t *dst = arg;
......@@ -396,12 +421,12 @@ do_copy_regs (struct unw_frame_info *info, void *arg)
unw_get_sp(info, &sp);
pt = (struct pt_regs *) (sp + 16);
urbs_end = ia64_get_user_rbs_end(current, pt, &cfm);
urbs_end = ia64_get_user_rbs_end(task, pt, &cfm);
if (ia64_sync_user_rbs(current, info->sw, pt->ar_bspstore, urbs_end) < 0)
if (ia64_sync_user_rbs(task, info->sw, pt->ar_bspstore, urbs_end) < 0)
return;
ia64_peek(current, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *) urbs_end),
ia64_peek(task, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *) urbs_end),
&ar_rnat);
/*
......@@ -450,7 +475,7 @@ do_copy_regs (struct unw_frame_info *info, void *arg)
}
void
do_dump_fpu (struct unw_frame_info *info, void *arg)
do_dump_task_fpu (struct task_struct *task, struct unw_frame_info *info, void *arg)
{
elf_fpreg_t *dst = arg;
int i;
......@@ -465,22 +490,41 @@ do_dump_fpu (struct unw_frame_info *info, void *arg)
for (i = 2; i < 32; ++i)
unw_get_fr(info, i, dst + i);
ia64_flush_fph(current);
if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0)
memcpy(dst + 32, current->thread.fph, 96*16);
ia64_flush_fph(task);
if ((task->thread.flags & IA64_THREAD_FPH_VALID) != 0)
memcpy(dst + 32, task->thread.fph, 96*16);
}
void
ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
int dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
{
unw_init_running(do_copy_regs, dst);
struct unw_frame_info tcore_info;
if(current == task) {
unw_init_running(do_copy_regs, regs);
}
else {
memset(&tcore_info, 0, sizeof(tcore_info));
unw_init_from_blocked_task(&tcore_info, task);
do_copy_task_regs(task, &tcore_info, regs);
}
return 1;
}
int
dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
int dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
{
unw_init_running(do_dump_fpu, dst);
return 1; /* f0-f31 are always valid so we always return 1 */
struct unw_frame_info tcore_info;
if(current == task) {
unw_init_running(do_dump_fpu, dst);
}
else {
memset(&tcore_info, 0, sizeof(tcore_info));
unw_init_from_blocked_task(&tcore_info, task);
do_dump_task_fpu(task, &tcore_info, dst);
}
return 1;
}
asmlinkage long
......
This diff is collapsed.
......@@ -1209,6 +1209,35 @@ void format_corename(char *corename, const char *pattern, long signr)
*out_ptr = 0;
}
static void zap_threads (struct mm_struct *mm)
{
struct task_struct *g, *p;
/* give other threads a chance to run: */
yield();
read_lock(&tasklist_lock);
do_each_thread(g,p)
if (mm == p->mm && !p->core_waiter)
force_sig_specific(SIGKILL, p);
while_each_thread(g,p);
read_unlock(&tasklist_lock);
}
static void coredump_wait(struct mm_struct *mm)
{
DECLARE_WAITQUEUE(wait, current);
atomic_inc(&mm->core_waiters);
add_wait_queue(&mm->core_wait, &wait);
zap_threads(mm);
current->state = TASK_UNINTERRUPTIBLE;
if (atomic_read(&mm->core_waiters) != atomic_read(&mm->mm_users))
schedule();
else
current->state = TASK_RUNNING;
}
int do_coredump(long signr, struct pt_regs * regs)
{
struct linux_binfmt * binfmt;
......@@ -1224,13 +1253,16 @@ int do_coredump(long signr, struct pt_regs * regs)
if (!current->mm->dumpable)
goto fail;
current->mm->dumpable = 0;
if (down_trylock(&current->mm->core_sem))
BUG();
coredump_wait(current->mm);
if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
goto fail;
goto fail_unlock;
format_corename(corename, core_pattern, signr);
file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW, 0600);
if (IS_ERR(file))
goto fail;
goto fail_unlock;
inode = file->f_dentry->d_inode;
if (inode->i_nlink > 1)
goto close_fail; /* multiple links - don't dump */
......@@ -1250,6 +1282,8 @@ int do_coredump(long signr, struct pt_regs * regs)
close_fail:
filp_close(file, NULL);
fail_unlock:
up(&current->mm->core_sem);
fail:
unlock_kernel();
return retval;
......
......@@ -7,6 +7,7 @@
#include <asm/ptrace.h>
#include <asm/user.h>
#include <asm/processor.h>
#include <linux/utsname.h>
......@@ -59,6 +60,9 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
/* Wow, the "main" arch needs arch dependent functions too.. :) */
#define savesegment(seg,value) \
asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value)))
/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is
now struct_user_regs, they are different) */
......@@ -72,9 +76,8 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
pr_reg[6] = regs->eax; \
pr_reg[7] = regs->xds; \
pr_reg[8] = regs->xes; \
/* fake once used fs and gs selectors? */ \
pr_reg[9] = regs->xds; /* was fs and __fs */ \
pr_reg[10] = regs->xds; /* was gs and __gs */ \
savesegment(fs,pr_reg[9]); \
savesegment(gs,pr_reg[10]); \
pr_reg[11] = regs->orig_eax; \
pr_reg[12] = regs->eip; \
pr_reg[13] = regs->xcs; \
......@@ -99,6 +102,20 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
#ifdef __KERNEL__
#define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX)
extern int dump_task_regs (struct task_struct *, elf_gregset_t *);
extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
extern int dump_task_extended_fpu (struct task_struct *, struct user_fxsr_struct *);
#define ELF_CORE_COPY_TASK_REGS(tsk, elf_regs) dump_task_regs(tsk, elf_regs)
#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
#define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs)
#ifdef CONFIG_SMP
extern void dump_smp_unlazy_fpu(void);
#define ELF_CORE_SYNC dump_smp_unlazy_fpu
#endif
#endif
#endif
......@@ -65,12 +65,16 @@ extern void ia64_init_addr_space (void);
#define ELF_NGREG 128 /* we really need just 72 but let's leave some headroom... */
#define ELF_NFPREG 128 /* f0 and f1 could be omitted, but so what... */
typedef unsigned long elf_fpxregset_t;
typedef unsigned long elf_greg_t;
typedef elf_greg_t elf_gregset_t[ELF_NGREG];
typedef struct ia64_fpreg elf_fpreg_t;
typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
struct pt_regs; /* forward declaration... */
extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst);
#define ELF_CORE_COPY_REGS(_dest,_regs) ia64_elf_core_copy_regs(_regs, _dest);
......@@ -88,6 +92,14 @@ extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst);
struct elf64_hdr;
extern void ia64_set_personality (struct elf64_hdr *elf_ex, int ibcs2_interpreter);
#define SET_PERSONALITY(ex, ibcs2) ia64_set_personality(&(ex), ibcs2)
extern int dump_task_regs(struct task_struct *, elf_gregset_t *);
extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
#define ELF_CORE_COPY_TASK_REGS(tsk, elf_gregs) dump_task_regs(tsk, elf_gregs)
#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
#endif
#endif /* _ASM_IA64_ELF_H */
#ifndef _LINUX_ELF_H
#define _LINUX_ELF_H
#include <linux/sched.h>
#include <linux/types.h>
#include <asm/elf.h>
......@@ -575,7 +576,8 @@ typedef struct elf64_shdr {
#define NT_PRFPREG 2
#define NT_PRPSINFO 3
#define NT_TASKSTRUCT 4
#define NT_PRFPXREG 20
#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */
/* Note header in a PT_NOTE section */
typedef struct elf32_note {
......
......@@ -85,4 +85,45 @@ typedef struct elf_prpsinfo prpsinfo_t;
#define PRARGSZ ELF_PRARGSZ
#endif
#ifdef __KERNEL__
static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
{
#ifdef ELF_CORE_COPY_REGS
ELF_CORE_COPY_REGS((*elfregs), regs)
#else
BUG_ON(sizeof(*elfregs) != sizeof(*regs));
*(struct pt_regs *)elfregs = *regs;
#endif
}
static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs)
{
#ifdef ELF_CORE_COPY_TASK_REGS
return ELF_CORE_COPY_TASK_REGS(t, elfregs);
#endif
return 0;
}
extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
static inline int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu)
{
#ifdef ELF_CORE_COPY_FPREGS
return ELF_CORE_COPY_FPREGS(t, fpu);
#else
return dump_fpu(NULL, fpu);
#endif
}
#ifdef ELF_CORE_COPY_XFPREGS
static inline int elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu)
{
return ELF_CORE_COPY_XFPREGS(t, xfpu);
}
#endif
#endif /* __KERNEL__ */
#endif /* _LINUX_ELFCORE_H */
......@@ -208,6 +208,11 @@ struct mm_struct {
/* Architecture-specific MM context */
mm_context_t context;
/* coredumping support */
struct semaphore core_sem;
atomic_t core_waiters;
wait_queue_head_t core_wait;
/* aio bits */
rwlock_t ioctx_list_lock;
struct kioctx *ioctx_list;
......@@ -401,6 +406,8 @@ struct task_struct {
void *journal_info;
struct dentry *proc_dentry;
struct backing_dev_info *backing_dev_info;
/* threaded coredumping support */
int core_waiter;
};
extern void __put_task_struct(struct task_struct *tsk);
......@@ -540,6 +547,7 @@ extern int kill_proc_info(int, struct siginfo *, pid_t);
extern void notify_parent(struct task_struct *, int);
extern void do_notify_parent(struct task_struct *, int);
extern void force_sig(int, struct task_struct *);
extern void force_sig_specific(int, struct task_struct *);
extern int send_sig(int, struct task_struct *, int);
extern int __broadcast_thread_group(struct task_struct *p, int sig);
extern int kill_pg(pid_t, int, int);
......
......@@ -416,19 +416,31 @@ void end_lazy_tlb(struct mm_struct *mm)
*/
static inline void __exit_mm(struct task_struct * tsk)
{
struct mm_struct * mm = tsk->mm;
struct mm_struct *mm = tsk->mm;
mm_release();
if (mm) {
atomic_inc(&mm->mm_count);
if (mm != tsk->active_mm) BUG();
/* more a memory barrier than a real lock */
task_lock(tsk);
tsk->mm = NULL;
enter_lazy_tlb(mm, current, smp_processor_id());
task_unlock(tsk);
mmput(mm);
if (!mm)
return;
/*
* Serialize with any possible pending coredump:
*/
if (!mm->dumpable) {
current->core_waiter = 1;
atomic_inc(&mm->core_waiters);
if (atomic_read(&mm->core_waiters) ==atomic_read(&mm->mm_users))
wake_up(&mm->core_wait);
down(&mm->core_sem);
up(&mm->core_sem);
atomic_dec(&mm->core_waiters);
}
atomic_inc(&mm->mm_count);
if (mm != tsk->active_mm) BUG();
/* more a memory barrier than a real lock */
task_lock(tsk);
tsk->mm = NULL;
enter_lazy_tlb(mm, current, smp_processor_id());
task_unlock(tsk);
mmput(mm);
}
void exit_mm(struct task_struct *tsk)
......
......@@ -305,6 +305,9 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
atomic_set(&mm->mm_users, 1);
atomic_set(&mm->mm_count, 1);
init_rwsem(&mm->mmap_sem);
init_MUTEX(&mm->core_sem);
init_waitqueue_head(&mm->core_wait);
atomic_set(&mm->core_waiters, 0);
mm->page_table_lock = SPIN_LOCK_UNLOCKED;
mm->ioctx_list_lock = RW_LOCK_UNLOCKED;
mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
......@@ -771,6 +774,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
INIT_LIST_HEAD(&p->local_pages);
p->core_waiter = 0;
retval = -ENOMEM;
if (security_ops->task_alloc_security(p))
goto bad_fork_cleanup;
......
......@@ -768,7 +768,7 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
}
static int
specific_force_sig_info(int sig, struct task_struct *t)
__specific_force_sig_info(int sig, struct task_struct *t)
{
if (!t->sig)
return -ESRCH;
......@@ -781,6 +781,20 @@ specific_force_sig_info(int sig, struct task_struct *t)
return specific_send_sig_info(sig, (void *)2, t, 0);
}
void
force_sig_specific(int sig, struct task_struct *t)
{
unsigned long int flags;
spin_lock_irqsave(&t->sig->siglock, flags);
if (t->sig->action[sig-1].sa.sa_handler == SIG_IGN)
t->sig->action[sig-1].sa.sa_handler = SIG_DFL;
sigdelset(&t->blocked, sig);
recalc_sigpending_tsk(t);
specific_send_sig_info(sig, (void *)2, t, 0);
spin_unlock_irqrestore(&t->sig->siglock, flags);
}
#define can_take_signal(p, sig) \
(((unsigned long) p->sig->action[sig-1].sa.sa_handler > 1) && \
!sigismember(&p->blocked, sig) && (task_curr(p) || !signal_pending(p)))
......@@ -846,7 +860,7 @@ int __broadcast_thread_group(struct task_struct *p, int sig)
int err = 0;
for_each_task_pid(p->tgid, PIDTYPE_TGID, tmp, l, pid)
err = specific_force_sig_info(sig, tmp);
err = __specific_force_sig_info(sig, tmp);
return err;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment