Commit 97db62cc authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds

[PATCH] scheduler fixes

 - introduce new type of context-switch locking, this is a must-have for
   ia64 and sparc64.

 - load_balance() bug noticed by Scott Rhine and myself: scan the
   whole list to find imbalance number of tasks, not just the tail
   of the list.

 - sched_yield() fix: use current->array not rq->active.
parent 9e7cec88
...@@ -11,11 +11,6 @@ ...@@ -11,11 +11,6 @@
struct task_struct; /* one of the stranger aspects of C forward declarations.. */ struct task_struct; /* one of the stranger aspects of C forward declarations.. */
extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next)); extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
#define prepare_arch_schedule(prev) do { } while(0)
#define finish_arch_schedule(prev) do { } while(0)
#define prepare_arch_switch(rq) do { } while(0)
#define finish_arch_switch(rq) spin_unlock_irq(&(rq)->lock)
#define switch_to(prev,next,last) do { \ #define switch_to(prev,next,last) do { \
asm volatile("pushl %%esi\n\t" \ asm volatile("pushl %%esi\n\t" \
"pushl %%edi\n\t" \ "pushl %%edi\n\t" \
......
...@@ -83,11 +83,6 @@ extern void cacheable_memzero(void *p, unsigned int nb); ...@@ -83,11 +83,6 @@ extern void cacheable_memzero(void *p, unsigned int nb);
struct device_node; struct device_node;
extern void note_scsi_host(struct device_node *, void *); extern void note_scsi_host(struct device_node *, void *);
#define prepare_arch_schedule(prev) do { } while(0)
#define finish_arch_schedule(prev) do { } while(0)
#define prepare_arch_switch(rq) do { } while(0)
#define finish_arch_switch(rq) spin_unlock_irq(&(rq)->lock)
struct task_struct; struct task_struct;
extern void __switch_to(struct task_struct *, struct task_struct *); extern void __switch_to(struct task_struct *, struct task_struct *);
#define switch_to(prev, next, last) __switch_to((prev), (next)) #define switch_to(prev, next, last) __switch_to((prev), (next))
......
...@@ -18,11 +18,6 @@ ...@@ -18,11 +18,6 @@
#endif #endif
#include <linux/kernel.h> #include <linux/kernel.h>
#define prepare_arch_schedule(prev) do { } while (0)
#define finish_arch_schedule(prev) do { } while (0)
#define prepare_arch_switch(rq) do { } while (0)
#define finish_arch_switch(rq) spin_unlock_irq(&(rq)->lock)
#define switch_to(prev,next,last) do { \ #define switch_to(prev,next,last) do { \
if (prev == next) \ if (prev == next) \
break; \ break; \
......
...@@ -18,11 +18,6 @@ ...@@ -18,11 +18,6 @@
#endif #endif
#include <linux/kernel.h> #include <linux/kernel.h>
#define prepare_arch_schedule(prev) do { } while (0)
#define finish_arch_schedule(prev) do { } while (0)
#define prepare_arch_switch(rq) do { } while (0)
#define finish_arch_switch(rq) spin_unlock_irq(&(rq)->lock)
#define switch_to(prev,next),last do { \ #define switch_to(prev,next),last do { \
if (prev == next) \ if (prev == next) \
break; \ break; \
......
...@@ -140,13 +140,17 @@ extern void __flushw_user(void); ...@@ -140,13 +140,17 @@ extern void __flushw_user(void);
#define flush_user_windows flushw_user #define flush_user_windows flushw_user
#define flush_register_windows flushw_all #define flush_register_windows flushw_all
#define prepare_arch_schedule(prev) task_lock(prev) #define prepare_arch_switch(rq, next) \
#define finish_arch_schedule(prev) task_unlock(prev) do { spin_lock(&(next)->switch_lock); \
#define prepare_arch_switch(rq) \ spin_unlock(&(rq)->lock); \
do { spin_unlock(&(rq)->lock); \
flushw_all(); \ flushw_all(); \
} while (0) } while (0)
#define finish_arch_switch(rq) local_irq_enable()
#define finish_arch_switch(rq, prev) \
do { spin_unlock_irq(&(prev)->switch_lock); \
} while (0)
#ifndef CONFIG_DEBUG_SPINLOCK #ifndef CONFIG_DEBUG_SPINLOCK
#define CHECK_LOCKS(PREV) do { } while(0) #define CHECK_LOCKS(PREV) do { } while(0)
......
...@@ -13,11 +13,6 @@ ...@@ -13,11 +13,6 @@
#define LOCK_PREFIX "" #define LOCK_PREFIX ""
#endif #endif
#define prepare_arch_schedule(prev) do { } while(0)
#define finish_arch_schedule(prev) do { } while(0)
#define prepare_arch_switch(rq) do { } while(0)
#define finish_arch_switch(rq) spin_unlock_irq(&(rq)->lock)
#define __STR(x) #x #define __STR(x) #x
#define STR(x) __STR(x) #define STR(x) __STR(x)
......
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
lock_depth: -1, \ lock_depth: -1, \
prio: MAX_PRIO-20, \ prio: MAX_PRIO-20, \
static_prio: MAX_PRIO-20, \ static_prio: MAX_PRIO-20, \
policy: SCHED_OTHER, \ policy: SCHED_NORMAL, \
cpus_allowed: -1, \ cpus_allowed: -1, \
mm: NULL, \ mm: NULL, \
active_mm: &init_mm, \ active_mm: &init_mm, \
...@@ -78,6 +78,7 @@ ...@@ -78,6 +78,7 @@
pending: { NULL, &tsk.pending.head, {{0}}}, \ pending: { NULL, &tsk.pending.head, {{0}}}, \
blocked: {{0}}, \ blocked: {{0}}, \
alloc_lock: SPIN_LOCK_UNLOCKED, \ alloc_lock: SPIN_LOCK_UNLOCKED, \
switch_lock: SPIN_LOCK_UNLOCKED, \
journal_info: NULL, \ journal_info: NULL, \
} }
......
...@@ -116,7 +116,7 @@ extern unsigned long nr_uninterruptible(void); ...@@ -116,7 +116,7 @@ extern unsigned long nr_uninterruptible(void);
/* /*
* Scheduling policies * Scheduling policies
*/ */
#define SCHED_OTHER 0 #define SCHED_NORMAL 0
#define SCHED_FIFO 1 #define SCHED_FIFO 1
#define SCHED_RR 2 #define SCHED_RR 2
...@@ -207,7 +207,7 @@ struct signal_struct { ...@@ -207,7 +207,7 @@ struct signal_struct {
/* /*
* Priority of a process goes from 0..MAX_PRIO-1, valid RT * Priority of a process goes from 0..MAX_PRIO-1, valid RT
* priority is 0..MAX_RT_PRIO-1, and SCHED_OTHER tasks are * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL tasks are
* in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values * in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values
* are inverted: lower p->prio value means higher priority. * are inverted: lower p->prio value means higher priority.
* *
...@@ -264,7 +264,7 @@ struct task_struct { ...@@ -264,7 +264,7 @@ struct task_struct {
unsigned long policy; unsigned long policy;
unsigned long cpus_allowed; unsigned long cpus_allowed;
unsigned int time_slice; unsigned int time_slice, first_time_slice;
struct list_head tasks; struct list_head tasks;
...@@ -361,6 +361,8 @@ struct task_struct { ...@@ -361,6 +361,8 @@ struct task_struct {
u32 self_exec_id; u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty */ /* Protection of (de-)allocation: mm, files, fs, tty */
spinlock_t alloc_lock; spinlock_t alloc_lock;
/* context-switch lock */
spinlock_t switch_lock;
/* journalling filesystem info */ /* journalling filesystem info */
void *journal_info; void *journal_info;
......
...@@ -184,7 +184,7 @@ void reparent_to_init(void) ...@@ -184,7 +184,7 @@ void reparent_to_init(void)
current->exit_signal = SIGCHLD; current->exit_signal = SIGCHLD;
current->ptrace = 0; current->ptrace = 0;
if ((current->policy == SCHED_OTHER) && (task_nice(current) < 0)) if ((current->policy == SCHED_NORMAL) && (task_nice(current) < 0))
set_user_nice(current, 0); set_user_nice(current, 0);
/* cpus_allowed? */ /* cpus_allowed? */
/* rt_priority? */ /* rt_priority? */
......
...@@ -611,7 +611,6 @@ struct task_struct *do_fork(unsigned long clone_flags, ...@@ -611,7 +611,6 @@ struct task_struct *do_fork(unsigned long clone_flags,
unsigned long stack_size) unsigned long stack_size)
{ {
int retval; int retval;
unsigned long flags;
struct task_struct *p = NULL; struct task_struct *p = NULL;
struct completion vfork; struct completion vfork;
...@@ -675,6 +674,7 @@ struct task_struct *do_fork(unsigned long clone_flags, ...@@ -675,6 +674,7 @@ struct task_struct *do_fork(unsigned long clone_flags,
init_completion(&vfork); init_completion(&vfork);
} }
spin_lock_init(&p->alloc_lock); spin_lock_init(&p->alloc_lock);
spin_lock_init(&p->switch_lock);
clear_tsk_thread_flag(p,TIF_SIGPENDING); clear_tsk_thread_flag(p,TIF_SIGPENDING);
init_sigpending(&p->pending); init_sigpending(&p->pending);
...@@ -740,8 +740,13 @@ struct task_struct *do_fork(unsigned long clone_flags, ...@@ -740,8 +740,13 @@ struct task_struct *do_fork(unsigned long clone_flags,
* total amount of pending timeslices in the system doesnt change, * total amount of pending timeslices in the system doesnt change,
* resulting in more scheduling fairness. * resulting in more scheduling fairness.
*/ */
local_irq_save(flags); local_irq_disable();
p->time_slice = (current->time_slice + 1) >> 1; p->time_slice = (current->time_slice + 1) >> 1;
/*
* The remainder of the first timeslice might be recovered by
* the parent if the child exits early enough.
*/
p->first_time_slice = 1;
current->time_slice >>= 1; current->time_slice >>= 1;
p->sleep_timestamp = jiffies; p->sleep_timestamp = jiffies;
if (!current->time_slice) { if (!current->time_slice) {
...@@ -753,11 +758,10 @@ struct task_struct *do_fork(unsigned long clone_flags, ...@@ -753,11 +758,10 @@ struct task_struct *do_fork(unsigned long clone_flags,
current->time_slice = 1; current->time_slice = 1;
preempt_disable(); preempt_disable();
scheduler_tick(0, 0); scheduler_tick(0, 0);
local_irq_restore(flags); local_irq_enable();
preempt_enable(); preempt_enable();
} else } else
local_irq_restore(flags); local_irq_enable();
/* /*
* Ok, add it to the run-queues and make it * Ok, add it to the run-queues and make it
* visible to the rest of the system. * visible to the rest of the system.
......
...@@ -190,16 +190,19 @@ int request_module(const char * module_name) ...@@ -190,16 +190,19 @@ int request_module(const char * module_name)
pid_t pid; pid_t pid;
int waitpid_result; int waitpid_result;
sigset_t tmpsig; sigset_t tmpsig;
int i; int i, ret;
static atomic_t kmod_concurrent = ATOMIC_INIT(0); static atomic_t kmod_concurrent = ATOMIC_INIT(0);
#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
static int kmod_loop_msg; static int kmod_loop_msg;
unsigned long saved_policy = current->policy;
current->policy = SCHED_NORMAL;
/* Don't allow request_module() before the root fs is mounted! */ /* Don't allow request_module() before the root fs is mounted! */
if ( ! current->fs->root ) { if ( ! current->fs->root ) {
printk(KERN_ERR "request_module[%s]: Root fs not mounted\n", printk(KERN_ERR "request_module[%s]: Root fs not mounted\n",
module_name); module_name);
return -EPERM; ret = -EPERM;
goto out;
} }
/* If modprobe needs a service that is in a module, we get a recursive /* If modprobe needs a service that is in a module, we get a recursive
...@@ -220,14 +223,16 @@ int request_module(const char * module_name) ...@@ -220,14 +223,16 @@ int request_module(const char * module_name)
printk(KERN_ERR printk(KERN_ERR
"kmod: runaway modprobe loop assumed and stopped\n"); "kmod: runaway modprobe loop assumed and stopped\n");
atomic_dec(&kmod_concurrent); atomic_dec(&kmod_concurrent);
return -ENOMEM; ret = -ENOMEM;
goto out;
} }
pid = kernel_thread(exec_modprobe, (void*) module_name, 0); pid = kernel_thread(exec_modprobe, (void*) module_name, 0);
if (pid < 0) { if (pid < 0) {
printk(KERN_ERR "request_module[%s]: fork failed, errno %d\n", module_name, -pid); printk(KERN_ERR "request_module[%s]: fork failed, errno %d\n", module_name, -pid);
atomic_dec(&kmod_concurrent); atomic_dec(&kmod_concurrent);
return pid; ret = pid;
goto out;
} }
/* Block everything but SIGKILL/SIGSTOP */ /* Block everything but SIGKILL/SIGSTOP */
...@@ -250,7 +255,10 @@ int request_module(const char * module_name) ...@@ -250,7 +255,10 @@ int request_module(const char * module_name)
printk(KERN_ERR "request_module[%s]: waitpid(%d,...) failed, errno %d\n", printk(KERN_ERR "request_module[%s]: waitpid(%d,...) failed, errno %d\n",
module_name, pid, -waitpid_result); module_name, pid, -waitpid_result);
} }
return 0; ret = 0;
out:
current->policy = saved_policy;
return ret;
} }
#endif /* CONFIG_KMOD */ #endif /* CONFIG_KMOD */
......
This diff is collapsed.
...@@ -500,7 +500,6 @@ inline void signal_wake_up(struct task_struct *t) ...@@ -500,7 +500,6 @@ inline void signal_wake_up(struct task_struct *t)
{ {
set_tsk_thread_flag(t,TIF_SIGPENDING); set_tsk_thread_flag(t,TIF_SIGPENDING);
#ifdef CONFIG_SMP
/* /*
* If the task is running on a different CPU * If the task is running on a different CPU
* force a reschedule on the other CPU to make * force a reschedule on the other CPU to make
...@@ -511,9 +510,8 @@ inline void signal_wake_up(struct task_struct *t) ...@@ -511,9 +510,8 @@ inline void signal_wake_up(struct task_struct *t)
* process of changing - but no harm is done by that * process of changing - but no harm is done by that
* other than doing an extra (lightweight) IPI interrupt. * other than doing an extra (lightweight) IPI interrupt.
*/ */
if ((t->state == TASK_RUNNING) && (t->thread_info->cpu != smp_processor_id())) if (t->state == TASK_RUNNING)
kick_if_running(t); kick_if_running(t);
#endif
if (t->state & TASK_INTERRUPTIBLE) { if (t->state & TASK_INTERRUPTIBLE) {
wake_up_process(t); wake_up_process(t);
return; return;
......
...@@ -888,7 +888,7 @@ asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp) ...@@ -888,7 +888,7 @@ asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
if (t.tv_sec == 0 && t.tv_nsec <= 2000000L && if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
current->policy != SCHED_OTHER) current->policy != SCHED_NORMAL)
{ {
/* /*
* Short delay requests up to 2 ms will be handled with * Short delay requests up to 2 ms will be handled with
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment