Commit d79c07a4 authored by Ingo Molnar's avatar Ingo Molnar

[PATCH] O(1) sys_exit(), threading, scalable-exit-2.5.31-B4

the attached patch updates a number of items:

 - adds cleanups suggested by Christoph Hellwig: needed unlikely()
   statements, a superfluous #define and line length problems.

 - splits up the global ptrace list into per-task ptrace lists. This was
   pretty straightforward, and this makes the worst-case exit() latency
   O(nr_children).

the per-task ptrace lists unearthed a bug that the previous code did not
take care of: tasks on the ptrace list have to be correctly reparented as
well. This patch passed my stresstests as well.
parent 5d6df147
#include <linux/config.h>
#include <linux/ptrace.h>
#include <linux/errno.h>
#include <linux/signal.h>
#include <linux/sched.h>
......
......@@ -18,7 +18,6 @@
*/
#include <linux/config.h>
#include <linux/ptrace.h>
#include <linux/errno.h>
#include <linux/signal.h>
#include <linux/sched.h>
......
......@@ -23,7 +23,6 @@
#include <linux/smp_lock.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/user.h>
......
......@@ -15,7 +15,6 @@
#include <linux/signal.h>
#include <linux/errno.h>
#include <linux/wait.h>
#include <linux/ptrace.h>
#include <linux/unistd.h>
#include <linux/stddef.h>
#include <linux/personality.h>
......
......@@ -16,7 +16,6 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/ptrace.h>
#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/init.h>
......
......@@ -8,7 +8,6 @@
#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/string.h>
#include <linux/ptrace.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
......
......@@ -7,7 +7,6 @@
#ifndef __ASSEMBLY__
#include <linux/config.h>
#include <linux/threads.h>
#include <linux/ptrace.h>
#endif
#ifdef CONFIG_X86_LOCAL_APIC
......
......@@ -2,7 +2,6 @@
#define _I386_USER_H
#include <asm/page.h>
#include <linux/ptrace.h>
/* Core file format: The core file is written in such a way that gdb
can understand it and provide useful information to the user (under
linux we use the 'trad-core' bfd). There are quite a number of
......
#ifndef _LINUX_BINFMTS_H
#define _LINUX_BINFMTS_H
#include <linux/ptrace.h>
#include <linux/capability.h>
/*
......
......@@ -4,7 +4,6 @@
#include <linux/types.h>
#include <linux/signal.h>
#include <linux/time.h>
#include <linux/ptrace.h>
#include <linux/user.h>
struct elf_siginfo
......
......@@ -54,6 +54,8 @@
.run_list = LIST_HEAD_INIT(tsk.run_list), \
.time_slice = HZ, \
.tasks = LIST_HEAD_INIT(tsk.tasks), \
.ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \
.ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \
.real_parent = &tsk, \
.parent = &tsk, \
.children = LIST_HEAD_INIT(tsk.children), \
......
......@@ -354,12 +354,6 @@ extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned
extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
extern int make_pages_present(unsigned long addr, unsigned long end);
extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len);
extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len);
extern int ptrace_attach(struct task_struct *tsk);
extern int ptrace_detach(struct task_struct *, unsigned int);
extern void ptrace_disable(struct task_struct *);
extern int ptrace_check_attach(struct task_struct *task, int kill);
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
......
......@@ -3,6 +3,8 @@
/* ptrace.h */
/* structs and defines to help the user use the ptrace system call. */
#include <linux/compiler.h>
/* has the defines to get at the registers. */
#define PTRACE_TRACEME 0
......@@ -23,4 +25,26 @@
#include <asm/ptrace.h>
extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len);
extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len);
extern int ptrace_attach(struct task_struct *tsk);
extern int ptrace_detach(struct task_struct *, unsigned int);
extern void ptrace_disable(struct task_struct *);
extern int ptrace_check_attach(struct task_struct *task, int kill);
extern void __ptrace_link(struct task_struct *child,
struct task_struct *new_parent);
extern void __ptrace_unlink(struct task_struct *child);
static inline void ptrace_link(struct task_struct *child,
struct task_struct *new_parent)
{
if (unlikely(child->ptrace))
__ptrace_link(child, new_parent);
}
static inline void ptrace_unlink(struct task_struct *child)
{
if (unlikely(child->ptrace))
__ptrace_unlink(child);
}
#endif
......@@ -270,6 +270,8 @@ struct task_struct {
unsigned int time_slice, first_time_slice;
struct list_head tasks;
struct list_head ptrace_children;
struct list_head ptrace_list;
struct mm_struct *mm, *active_mm;
struct list_head local_pages;
......
......@@ -18,6 +18,7 @@
#include <linux/acct.h>
#include <linux/file.h>
#include <linux/binfmts.h>
#include <linux/ptrace.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
......@@ -65,6 +66,8 @@ static void release_task(struct task_struct * p)
atomic_dec(&p->user->processes);
security_ops->task_free_security(p);
free_uid(p->user);
BUG_ON(p->ptrace || !list_empty(&p->ptrace_list) ||
!list_empty(&p->ptrace_children));
unhash_process(p);
release_thread(p);
......@@ -177,6 +180,7 @@ void reparent_to_init(void)
{
write_lock_irq(&tasklist_lock);
ptrace_unlink(current);
/* Reparent to init */
REMOVE_LINKS(current);
current->parent = child_reaper;
......@@ -231,31 +235,8 @@ void daemonize(void)
atomic_inc(&current->files->count);
}
/*
* When we die, we re-parent all our children.
* Try to give them to another thread in our thread
* group, and if no such member exists, give it to
* the global child reaper process (ie "init")
*/
static inline void forget_original_parent(struct task_struct * father)
static void reparent_thread(task_t *p, task_t *reaper, task_t *child_reaper)
{
struct task_struct * p, *reaper;
read_lock(&tasklist_lock);
/* Next in our thread group, if they're not already exiting */
reaper = father;
do {
reaper = next_thread(reaper);
if (!(reaper->flags & PF_EXITING))
break;
} while (reaper != father);
if (reaper == father)
reaper = child_reaper;
for_each_task(p) {
if (p->real_parent == father) {
/* We dont want people slaying init */
p->exit_signal = SIGCHLD;
p->self_exec_id++;
......@@ -266,10 +247,8 @@ static inline void forget_original_parent(struct task_struct * father)
else
p->real_parent = reaper;
if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
}
}
read_unlock(&tasklist_lock);
if (p->pdeath_signal)
send_sig(p->pdeath_signal, p, 0);
}
static inline void close_files(struct files_struct * files)
......@@ -419,13 +398,86 @@ void exit_mm(struct task_struct *tsk)
__exit_mm(tsk);
}
/*
* When we die, we re-parent all our children.
* Try to give them to another thread in our thread
* group, and if no such member exists, give it to
* the global child reaper process (ie "init")
*/
static inline void forget_original_parent(struct task_struct * father)
{
struct task_struct *p, *reaper;
list_t *_p;
read_lock(&tasklist_lock);
/* Next in our thread group, if they're not already exiting */
reaper = father;
do {
reaper = next_thread(reaper);
if (!(reaper->flags & PF_EXITING))
break;
} while (reaper != father);
if (reaper == father)
reaper = child_reaper;
/*
* There are only two places where our children can be:
*
* - in our child list
* - in the global ptrace list
*
* Search them and reparent children.
*/
list_for_each(_p, &father->children) {
p = list_entry(_p,struct task_struct,sibling);
reparent_thread(p, reaper, child_reaper);
}
list_for_each(_p, &father->ptrace_children) {
p = list_entry(_p,struct task_struct,ptrace_list);
reparent_thread(p, reaper, child_reaper);
}
read_unlock(&tasklist_lock);
}
static inline void zap_thread(task_t *p, task_t *father)
{
ptrace_unlink(p);
list_del_init(&p->sibling);
p->ptrace = 0;
p->parent = p->real_parent;
list_add_tail(&p->sibling, &p->parent->children);
if (p->state == TASK_ZOMBIE && p->exit_signal != -1)
do_notify_parent(p, p->exit_signal);
/*
* process group orphan check
* Case ii: Our child is in a different pgrp
* than we are, and it was the only connection
* outside, so the child pgrp is now orphaned.
*/
if ((p->pgrp != current->pgrp) &&
(p->session == current->session)) {
int pgrp = p->pgrp;
write_unlock_irq(&tasklist_lock);
if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
kill_pg(pgrp,SIGHUP,1);
kill_pg(pgrp,SIGCONT,1);
}
write_lock_irq(&tasklist_lock);
}
}
/*
* Send signals to all our closest relatives so that they know
* to properly mourn us..
*/
static void exit_notify(void)
{
struct task_struct * p, *t;
struct task_struct *t;
list_t *_p, *_n;
forget_original_parent(current);
/*
......@@ -484,33 +536,20 @@ static void exit_notify(void)
current->state = TASK_ZOMBIE;
if (current->exit_signal != -1)
do_notify_parent(current, current->exit_signal);
while ((p = eldest_child(current))) {
list_del_init(&p->sibling);
p->ptrace = 0;
p->parent = p->real_parent;
list_add_tail(&p->sibling,&p->parent->children);
if (p->state == TASK_ZOMBIE && p->exit_signal != -1)
do_notify_parent(p, p->exit_signal);
zap_again:
list_for_each_safe(_p, _n, &current->children)
zap_thread(list_entry(_p,struct task_struct,sibling), current);
list_for_each_safe(_p, _n, &current->ptrace_children)
zap_thread(list_entry(_p,struct task_struct,ptrace_list), current);
/*
* process group orphan check
* Case ii: Our child is in a different pgrp
* than we are, and it was the only connection
* outside, so the child pgrp is now orphaned.
* reparent_thread might drop the tasklist lock, thus we could
* have new children queued back from the ptrace list into the
* child list:
*/
if ((p->pgrp != current->pgrp) &&
(p->session == current->session)) {
int pgrp = p->pgrp;
write_unlock_irq(&tasklist_lock);
if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
kill_pg(pgrp,SIGHUP,1);
kill_pg(pgrp,SIGCONT,1);
}
write_lock_irq(&tasklist_lock);
}
}
if (unlikely(!list_empty(&current->children) ||
!list_empty(&current->ptrace_children)))
goto zap_again;
/*
* No need to unlock IRQs, we'll schedule() immediately
* anyway. In the preemption case this also makes it
......@@ -623,6 +662,12 @@ asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struc
if (p->pgrp != -pid)
continue;
}
/*
* Do not consider detached threads that are
* not ptraced:
*/
if (p->exit_signal == -1 && !p->ptrace)
continue;
/* Wait for all children (clone and not) if __WALL is set;
* otherwise, wait for clone children *only* if __WCLONE is
* set; otherwise, wait for non-clone children *only*. (Note:
......@@ -667,7 +712,7 @@ asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struc
if (retval)
goto end_wait4;
retval = p->pid;
if (p->real_parent != p->parent) {
if (p->real_parent != p->parent || p->ptrace) {
write_lock_irq(&tasklist_lock);
remove_parent(p);
p->parent = p->real_parent;
......
......@@ -27,6 +27,7 @@
#include <linux/fs.h>
#include <linux/security.h>
#include <linux/futex.h>
#include <linux/ptrace.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
......@@ -808,6 +809,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
*/
p->tgid = p->pid;
INIT_LIST_HEAD(&p->thread_group);
INIT_LIST_HEAD(&p->ptrace_children);
INIT_LIST_HEAD(&p->ptrace_list);
/* Need tasklist lock for parent etc handling! */
write_lock_irq(&tasklist_lock);
......@@ -827,6 +830,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
}
SET_LINKS(p);
ptrace_link(p, p->parent);
hash_pid(p);
nr_threads++;
write_unlock_irq(&tasklist_lock);
......
......@@ -13,10 +13,48 @@
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/smp_lock.h>
#include <linux/ptrace.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
/*
* ptrace a task: make the debugger its new parent and
* move it to the ptrace list.
*
* Must be called with the tasklist lock write-held.
*/
void __ptrace_link(task_t *child, task_t *new_parent)
{
if (!list_empty(&child->ptrace_list))
BUG();
if (child->parent == new_parent)
BUG();
list_add(&child->ptrace_list, &child->parent->ptrace_children);
REMOVE_LINKS(child);
child->parent = new_parent;
SET_LINKS(child);
}
/*
* unptrace a task: move it back to its original parent and
* remove it from the ptrace list.
*
* Must be called with the tasklist lock write-held.
*/
void __ptrace_unlink(task_t *child)
{
if (!child->ptrace)
BUG();
child->ptrace = 0;
if (list_empty(&child->ptrace_list))
return;
list_del_init(&child->ptrace_list);
REMOVE_LINKS(child);
child->parent = child->real_parent;
SET_LINKS(child);
}
/*
* Check that we have indeed attached to the thing..
*/
......@@ -75,11 +113,7 @@ int ptrace_attach(struct task_struct *task)
task_unlock(task);
write_lock_irq(&tasklist_lock);
if (task->parent != current) {
REMOVE_LINKS(task);
task->parent = current;
SET_LINKS(task);
}
__ptrace_link(task, current);
write_unlock_irq(&tasklist_lock);
send_sig(SIGSTOP, task, 1);
......@@ -99,16 +133,15 @@ int ptrace_detach(struct task_struct *child, unsigned int data)
ptrace_disable(child);
/* .. re-parent .. */
child->ptrace = 0;
child->exit_code = data;
write_lock_irq(&tasklist_lock);
REMOVE_LINKS(child);
child->parent = child->real_parent;
SET_LINKS(child);
write_unlock_irq(&tasklist_lock);
write_lock_irq(&tasklist_lock);
__ptrace_unlink(child);
/* .. and wake it up. */
if (child->state != TASK_ZOMBIE)
wake_up_process(child);
write_unlock_irq(&tasklist_lock);
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment