Commit 79e4dd94 authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds

[PATCH] signal latency fixes

This fixes an SMP window where the kernel could miss to handle a signal,
and increase signal delivery latency up to 200 msecs.  Sun has reported
to Ulrich that their JVM sees occasional unexpected signal delays under
Linux.  The more CPUs, the more delays.

The cause of the problem is that the current signal wakeup
implementation is racy in kernel/signal.c:signal_wake_up():

        if (t->state == TASK_RUNNING)
                kick_if_running(t);
	...
        if (t->state & mask) {
                wake_up_process(t);
                return;
        }

If thread (or process) 't' is woken up on another CPU right after the
TASK_RUNNING check, and thread starts to run, then the wake_up_process()
here will do nothing, and the signal stays pending up until the thread
will call into the kernel next time - which can be up to 200 msecs
later.

The solution is to do the 'kicking' of a running thread on a remote CPU
atomically with the wakeup.  For this i've added wake_up_process_kick().
There is no slowdown for the other wakeup codepaths, the new flag to
try_to_wake_up() is compiled off for them.  Some other subsystems might
want to use this wakeup facility as well in the future (eg.  AIO).

In fact this race triggers quite often under Volanomark rusg, with this
change added, Volanomark performance is up from 500-800 to 2000-3000, on
a 4-way x86 box.
parent bf4928ae
......@@ -530,6 +530,7 @@ extern void do_timer(struct pt_regs *);
extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
extern int FASTCALL(wake_up_process(struct task_struct * tsk));
extern int FASTCALL(wake_up_process_kick(struct task_struct * tsk));
extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk));
extern void FASTCALL(sched_exit(task_t * p));
......@@ -643,7 +644,6 @@ extern void wait_task_inactive(task_t * p);
#else
#define wait_task_inactive(p) do { } while (0)
#endif
extern void kick_if_running(task_t * p);
#define remove_parent(p) list_del_init(&(p)->sibling)
#define add_parent(p, parent) list_add_tail(&(p)->sibling,&(parent)->children)
......
......@@ -454,27 +454,12 @@ void wait_task_inactive(task_t * p)
}
#endif
/*
* kick_if_running - kick the remote CPU if the task is running currently.
*
* This code is used by the signal code to signal tasks
* which are in user-mode, as quickly as possible.
*
* (Note that we do this lockless - if the task does anything
* while the message is in flight then it will notice the
* sigpending condition anyway.)
*/
void kick_if_running(task_t * p)
{
if ((task_running(task_rq(p), p)) && (task_cpu(p) != smp_processor_id()))
resched_task(p);
}
/***
* try_to_wake_up - wake up a thread
* @p: the to-be-woken-up thread
* @state: the mask of task states that can be woken
* @sync: do a synchronous wakeup?
* @kick: kick the CPU if the task is already running?
*
* Put it on the run-queue if it's not already there. The "current"
* thread is always on the run-queue (except when the actual
......@@ -484,7 +469,7 @@ void kick_if_running(task_t * p)
*
* returns failure only if the task is already active.
*/
static int try_to_wake_up(task_t * p, unsigned int state, int sync)
static int try_to_wake_up(task_t * p, unsigned int state, int sync, int kick)
{
int success = 0, requeue_waker = 0;
unsigned long flags;
......@@ -518,7 +503,9 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync)
resched_task(rq->curr);
}
success = 1;
}
} else
if (unlikely(kick) && task_running(rq, p))
resched_task(rq->curr);
p->state = TASK_RUNNING;
}
task_rq_unlock(rq, &flags);
......@@ -543,12 +530,17 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync)
int wake_up_process(task_t * p)
{
return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0, 0);
}
int wake_up_process_kick(task_t * p)
{
return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0, 1);
}
int wake_up_state(task_t *p, unsigned int state)
{
return try_to_wake_up(p, state, 0);
return try_to_wake_up(p, state, 0, 0);
}
/*
......@@ -1389,7 +1381,7 @@ asmlinkage void preempt_schedule(void)
int default_wake_function(wait_queue_t *curr, unsigned mode, int sync)
{
task_t *p = curr->task;
return try_to_wake_up(p, mode, sync);
return try_to_wake_up(p, mode, sync, 0);
}
/*
......
......@@ -520,18 +520,6 @@ inline void signal_wake_up(struct task_struct *t, int resume)
set_tsk_thread_flag(t,TIF_SIGPENDING);
/*
* If the task is running on a different CPU
* force a reschedule on the other CPU to make
* it notice the new signal quickly.
*
* The code below is a tad loose and might occasionally
* kick the wrong CPU if we catch the process in the
* process of changing - but no harm is done by that
* other than doing an extra (lightweight) IPI interrupt.
*/
if (t->state == TASK_RUNNING)
kick_if_running(t);
/*
* If resume is set, we want to wake it up in the TASK_STOPPED case.
* We don't check for TASK_STOPPED because there is a race with it
......@@ -543,7 +531,7 @@ inline void signal_wake_up(struct task_struct *t, int resume)
if (resume)
mask |= TASK_STOPPED;
if (t->state & mask) {
wake_up_process(t);
wake_up_process_kick(t);
return;
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment