Commit 873cbfcf authored by Ingo Molnar's avatar Ingo Molnar

- i've extended the scheduler context-switch mechanism with the following

  per-arch defines:

        prepare_arch_schedule(prev_task);
        finish_arch_schedule(prev_task);
        prepare_arch_switch(rq);
        finish_arch_switch(rq);

- plus switch_to() takes 3 parameters again:

        switch_to(prev, next, last);

- schedule_tail() has the 'prev' task parameter again, it must be passed
  over in switch_to() and passed in to the fork() startup path.

architectures that need to unlock the runqueue before doing the switch can
do the following:

 #define prepare_arch_schedule(prev)             task_lock(prev)
 #define finish_arch_schedule(prev)              task_unlock(prev)
 #define prepare_arch_switch(rq)                 spin_unlock(&(rq)->lock)
 #define finish_arch_switch(rq)                  __sti()

this way the task-lock makes sure that a task is not scheduled on some
other CPU before the switch-out finishes, but the runqueue lock is
dropped. (Local interrupts are kept disabled in this variant, just to
exclude things like TLB flushes - if that matters.)

architectures that can hold the runqueue lock during context-switch can do
the following simplification:

 #define prepare_arch_schedule(prev)             do { } while(0)
 #define finish_arch_schedule(prev)              do { } while(0)
 #define prepare_arch_switch(rq)                 do { } while(0)
 #define finish_arch_switch(rq)                  spin_unlock_irq(&(rq)->lock)

further optimizations possible in the 'simple' variant:

- an architecture does not have to handle the 'last' parameter in
  switch_to() if the 'prev' parameter is unused in finish_arch_schedule().
  This way the inlined return value of context_switch() too gets optimized
  away at compile-time.

- an architecture does not have to pass the 'prev' pointer to
  schedule_tail(), if the 'prev' parameter is unused in
  finish_arch_schedule().

the x86 architecture makes use of these optimizations.

Via this solution we have a reasonably flexible context-switch setup which
falls back to the current (faster) code on x86, but on other platforms the
runqueue lock can be dropped before doing the context-switch as well.

        Ingo

NOTE: i have coded and tested the 'complex' variant on x86 as well to make
      sure it works for you on Sparc64 - but since x86's switch_mm() is
      not too subtle it can use the simpler variant. [ The following
      things had to be done to make x86 arch use the complex variant: the
      4 complex macros have to be used in system.h, entry.S has to
      'pushl %ebx' and 'addl $4, %esp' around the call to schedule_tail(),
      and switch_to() had to be reverted to the 3-parameter variant
      present in the 2.4 kernels.

NOTE2: prepare_to_switch() functionality has been moved into the
       prepare_arch_switch() macro.

NOTE3: please use macros for prepare|finish_arch_switch() so that we can
       keep the scheduler data structures internal to sched.c.
parent b533e812
...@@ -193,6 +193,7 @@ ENTRY(lcall27) ...@@ -193,6 +193,7 @@ ENTRY(lcall27)
ENTRY(ret_from_fork) ENTRY(ret_from_fork)
#if CONFIG_SMP || CONFIG_PREEMPT #if CONFIG_SMP || CONFIG_PREEMPT
# NOTE: this function takes a parameter but it's unused on x86.
call schedule_tail call schedule_tail
#endif #endif
GET_THREAD_INFO(%ebx) GET_THREAD_INFO(%ebx)
......
...@@ -11,9 +11,12 @@ ...@@ -11,9 +11,12 @@
struct task_struct; /* one of the stranger aspects of C forward declarations.. */ struct task_struct; /* one of the stranger aspects of C forward declarations.. */
extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next)); extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
#define prepare_to_switch() do { } while(0) #define prepare_arch_schedule(prev) do { } while(0)
#define finish_arch_schedule(prev) do { } while(0)
#define prepare_arch_switch(rq) do { } while(0)
#define finish_arch_switch(rq) spin_unlock_irq(&(rq)->lock)
#define switch_to(prev,next) do { \ #define switch_to(prev,next,last) do { \
asm volatile("pushl %%esi\n\t" \ asm volatile("pushl %%esi\n\t" \
"pushl %%edi\n\t" \ "pushl %%edi\n\t" \
"pushl %%ebp\n\t" \ "pushl %%ebp\n\t" \
......
...@@ -451,19 +451,18 @@ void sched_exit(task_t * p) ...@@ -451,19 +451,18 @@ void sched_exit(task_t * p)
} }
#if CONFIG_SMP || CONFIG_PREEMPT #if CONFIG_SMP || CONFIG_PREEMPT
asmlinkage void schedule_tail(void) asmlinkage void schedule_tail(task_t *prev)
{ {
spin_unlock_irq(&this_rq()->lock); finish_arch_switch(this_rq());
finish_arch_schedule(prev);
} }
#endif #endif
static inline void context_switch(task_t *prev, task_t *next) static inline task_t * context_switch(task_t *prev, task_t *next)
{ {
struct mm_struct *mm = next->mm; struct mm_struct *mm = next->mm;
struct mm_struct *oldmm = prev->active_mm; struct mm_struct *oldmm = prev->active_mm;
prepare_to_switch();
if (unlikely(!mm)) { if (unlikely(!mm)) {
next->active_mm = oldmm; next->active_mm = oldmm;
atomic_inc(&oldmm->mm_count); atomic_inc(&oldmm->mm_count);
...@@ -477,7 +476,9 @@ static inline void context_switch(task_t *prev, task_t *next) ...@@ -477,7 +476,9 @@ static inline void context_switch(task_t *prev, task_t *next)
} }
/* Here we just switch the register state and the stack. */ /* Here we just switch the register state and the stack. */
switch_to(prev, next); switch_to(prev, next, prev);
return prev;
} }
unsigned long nr_running(void) unsigned long nr_running(void)
...@@ -823,6 +824,7 @@ asmlinkage void schedule(void) ...@@ -823,6 +824,7 @@ asmlinkage void schedule(void)
rq = this_rq(); rq = this_rq();
release_kernel_lock(prev, smp_processor_id()); release_kernel_lock(prev, smp_processor_id());
prepare_arch_schedule(prev);
prev->sleep_timestamp = jiffies; prev->sleep_timestamp = jiffies;
spin_lock_irq(&rq->lock); spin_lock_irq(&rq->lock);
...@@ -878,23 +880,20 @@ asmlinkage void schedule(void) ...@@ -878,23 +880,20 @@ asmlinkage void schedule(void)
if (likely(prev != next)) { if (likely(prev != next)) {
rq->nr_switches++; rq->nr_switches++;
rq->curr = next; rq->curr = next;
context_switch(prev, next);
/* prepare_arch_switch(rq);
* The runqueue pointer might be from another CPU prev = context_switch(prev, next);
* if the new task was last running on a different barrier();
* CPU - thus re-load it.
*/
mb();
rq = this_rq(); rq = this_rq();
} finish_arch_switch(rq);
} else
spin_unlock_irq(&rq->lock); spin_unlock_irq(&rq->lock);
finish_arch_schedule(prev);
reacquire_kernel_lock(current); reacquire_kernel_lock(current);
preempt_enable_no_resched(); preempt_enable_no_resched();
if (test_thread_flag(TIF_NEED_RESCHED)) if (test_thread_flag(TIF_NEED_RESCHED))
goto need_resched; goto need_resched;
return;
} }
#ifdef CONFIG_PREEMPT #ifdef CONFIG_PREEMPT
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment