Commit 326e5830 authored by Robert Love's avatar Robert Love Committed by Linus Torvalds

[PATCH] preemptive kernel behavior change: don't be rude

- do not manually set task->state
- instead, in preempt_schedule, set a flag in preempt_count that
  denotes that this task is entering schedule off a kernel preemption.
- use this flag in schedule to jump to pick_next_task
- in preempt_schedule, upon return from schedule, unset the flag
- have entry.S just call preempt_schedule and not duplicate this work,
  as Linus suggested.  I agree.  Note this makes debugging easier as
  we keep a single point of entry for kernel preemptions.

The result: we can safely preempt non-TASK_RUNNING tasks.  If one is
preempted, we can safely survive schedule because we won't handle the
special casing of non-TASK_RUNNING at the top of schedule.  Thus other
tasks can run as desired and our non-TASK_RUNNING task will eventually
be rescheduled, in its original state, and complete happily.

This is the behavior we have in the 2.4 patches and 2.5 until
~2.5.6-pre.  This works.  It requires no other changes elsewhere (it
actually removes some special-casing Ingo did in the signal code).
parent 60c06d75
...@@ -240,9 +240,7 @@ ENTRY(resume_kernel) ...@@ -240,9 +240,7 @@ ENTRY(resume_kernel)
jnz restore_all jnz restore_all
incl TI_PRE_COUNT(%ebx) incl TI_PRE_COUNT(%ebx)
sti sti
movl TI_TASK(%ebx), %ecx # ti->task call SYMBOL_NAME(preempt_schedule)
movl $0,(%ecx) # current->state = TASK_RUNNING
call SYMBOL_NAME(schedule)
jmp ret_from_intr jmp ret_from_intr
#endif #endif
......
...@@ -455,11 +455,9 @@ void do_syscall_trace(struct pt_regs *regs, int entryexit) ...@@ -455,11 +455,9 @@ void do_syscall_trace(struct pt_regs *regs, int entryexit)
between a syscall stop and SIGTRAP delivery */ between a syscall stop and SIGTRAP delivery */
current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
? 0x80 : 0); ? 0x80 : 0);
preempt_disable();
current->state = TASK_STOPPED; current->state = TASK_STOPPED;
notify_parent(current, SIGCHLD); notify_parent(current, SIGCHLD);
schedule(); schedule();
preempt_enable();
/* /*
* this isn't the same as continuing with a signal, but it will do * this isn't the same as continuing with a signal, but it will do
* for normal use. strace only continues with a signal if the * for normal use. strace only continues with a signal if the
......
...@@ -610,11 +610,9 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) ...@@ -610,11 +610,9 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
/* Let the debugger run. */ /* Let the debugger run. */
current->exit_code = signr; current->exit_code = signr;
preempt_disable();
current->state = TASK_STOPPED; current->state = TASK_STOPPED;
notify_parent(current, SIGCHLD); notify_parent(current, SIGCHLD);
schedule(); schedule();
preempt_enable();
/* We're back. Did the debugger cancel the sig? */ /* We're back. Did the debugger cancel the sig? */
if (!(signr = current->exit_code)) if (!(signr = current->exit_code))
...@@ -669,14 +667,12 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) ...@@ -669,14 +667,12 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
case SIGSTOP: { case SIGSTOP: {
struct signal_struct *sig; struct signal_struct *sig;
current->state = TASK_STOPPED;
current->exit_code = signr; current->exit_code = signr;
sig = current->parent->sig; sig = current->parent->sig;
preempt_disable();
current->state = TASK_STOPPED;
if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
notify_parent(current, SIGCHLD); notify_parent(current, SIGCHLD);
schedule(); schedule();
preempt_enable();
continue; continue;
} }
......
...@@ -91,6 +91,7 @@ extern unsigned long nr_running(void); ...@@ -91,6 +91,7 @@ extern unsigned long nr_running(void);
#define TASK_UNINTERRUPTIBLE 2 #define TASK_UNINTERRUPTIBLE 2
#define TASK_ZOMBIE 4 #define TASK_ZOMBIE 4
#define TASK_STOPPED 8 #define TASK_STOPPED 8
#define PREEMPT_ACTIVE 0x4000000
#define __set_task_state(tsk, state_value) \ #define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0) do { (tsk)->state = (state_value); } while (0)
......
...@@ -764,6 +764,13 @@ asmlinkage void schedule(void) ...@@ -764,6 +764,13 @@ asmlinkage void schedule(void)
prev->sleep_timestamp = jiffies; prev->sleep_timestamp = jiffies;
spin_lock_irq(&rq->lock); spin_lock_irq(&rq->lock);
/*
* if entering from preempt_schedule, off a kernel preemption,
* go straight to picking the next task.
*/
if (unlikely(preempt_get_count() & PREEMPT_ACTIVE))
goto pick_next_task;
switch (prev->state) { switch (prev->state) {
case TASK_INTERRUPTIBLE: case TASK_INTERRUPTIBLE:
if (unlikely(signal_pending(prev))) { if (unlikely(signal_pending(prev))) {
...@@ -775,7 +782,7 @@ asmlinkage void schedule(void) ...@@ -775,7 +782,7 @@ asmlinkage void schedule(void)
case TASK_RUNNING: case TASK_RUNNING:
; ;
} }
#if CONFIG_SMP #if CONFIG_SMP || CONFIG_PREEMPT
pick_next_task: pick_next_task:
#endif #endif
if (unlikely(!rq->nr_running)) { if (unlikely(!rq->nr_running)) {
...@@ -843,9 +850,11 @@ asmlinkage void preempt_schedule(void) ...@@ -843,9 +850,11 @@ asmlinkage void preempt_schedule(void)
{ {
if (unlikely(preempt_get_count())) if (unlikely(preempt_get_count()))
return; return;
if (current->state != TASK_RUNNING)
return; current_thread_info()->preempt_count += PREEMPT_ACTIVE;
schedule(); schedule();
current_thread_info()->preempt_count -= PREEMPT_ACTIVE;
barrier();
} }
#endif /* CONFIG_PREEMPT */ #endif /* CONFIG_PREEMPT */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment