Commit 0b7570e7 authored by Oleg Nesterov's avatar Oleg Nesterov Committed by Linus Torvalds

do_wait() wakeup optimization: change __wake_up_parent() to use filtered wakeup

Ratan Nalumasu reported that in a process with many threads doing
unnecessary wakeups.  Every waiting thread in the process wakes up to loop
through the children and see that the only ones it cares about are still
not ready.

Now that we have struct wait_opts we can change do_wait/__wake_up_parent
to use filtered wakeups.

We can make child_wait_callback() more clever later, right now it only
checks eligible_child().
Signed-off-by: default avatarOleg Nesterov <oleg@redhat.com>
Acked-by: default avatarRoland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ratan Nalumasu <rnalumasu@gmail.com>
Cc: Vitaly Mayatskikh <vmayatsk@redhat.com>
Acked-by: default avatarJames Morris <jmorris@namei.org>
Tested-by: default avatarValdis Kletnieks <valdis.kletnieks@vt.edu>
Acked-by: default avatarDavid Howells <dhowells@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a2322e1d
...@@ -1097,6 +1097,7 @@ struct wait_opts { ...@@ -1097,6 +1097,7 @@ struct wait_opts {
int __user *wo_stat; int __user *wo_stat;
struct rusage __user *wo_rusage; struct rusage __user *wo_rusage;
wait_queue_t child_wait;
int notask_error; int notask_error;
}; };
...@@ -1570,20 +1571,35 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) ...@@ -1570,20 +1571,35 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
return 0; return 0;
} }
static int child_wait_callback(wait_queue_t *wait, unsigned mode,
int sync, void *key)
{
struct wait_opts *wo = container_of(wait, struct wait_opts,
child_wait);
struct task_struct *p = key;
if (!eligible_child(wo, p))
return 0;
return default_wake_function(wait, mode, sync, key);
}
void __wake_up_parent(struct task_struct *p, struct task_struct *parent) void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
{ {
wake_up_interruptible_sync(&parent->signal->wait_chldexit); __wake_up_sync_key(&parent->signal->wait_chldexit,
TASK_INTERRUPTIBLE, 1, p);
} }
static long do_wait(struct wait_opts *wo) static long do_wait(struct wait_opts *wo)
{ {
DECLARE_WAITQUEUE(wait, current);
struct task_struct *tsk; struct task_struct *tsk;
int retval; int retval;
trace_sched_process_wait(wo->wo_pid); trace_sched_process_wait(wo->wo_pid);
add_wait_queue(&current->signal->wait_chldexit,&wait); init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
wo->child_wait.private = current;
add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
repeat: repeat:
/* /*
* If there is nothing that can match our critiera just get out. * If there is nothing that can match our critiera just get out.
...@@ -1624,7 +1640,8 @@ static long do_wait(struct wait_opts *wo) ...@@ -1624,7 +1640,8 @@ static long do_wait(struct wait_opts *wo)
} }
end: end:
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
remove_wait_queue(&current->signal->wait_chldexit,&wait); remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
if (wo->wo_info) { if (wo->wo_info) {
struct siginfo __user *infop = wo->wo_info; struct siginfo __user *infop = wo->wo_info;
......
...@@ -2411,7 +2411,7 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm) ...@@ -2411,7 +2411,7 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
/* Wake up the parent if it is waiting so that it can recheck /* Wake up the parent if it is waiting so that it can recheck
* wait permission to the new task SID. */ * wait permission to the new task SID. */
read_lock(&tasklist_lock); read_lock(&tasklist_lock);
wake_up_interruptible(&current->real_parent->signal->wait_chldexit); __wake_up_parent(current, current->real_parent);
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment