Commit 97db62cc authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds

[PATCH] scheduler fixes

 - introduce new type of context-switch locking, this is a must-have for
   ia64 and sparc64.

 - load_balance() bug noticed by Scott Rhine and myself: scan the
   whole list to find imbalance number of tasks, not just the tail
   of the list.

 - sched_yield() fix: use current->array not rq->active.
parent 9e7cec88
......@@ -11,11 +11,6 @@
struct task_struct; /* one of the stranger aspects of C forward declarations.. */
extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
#define prepare_arch_schedule(prev) do { } while(0)
#define finish_arch_schedule(prev) do { } while(0)
#define prepare_arch_switch(rq) do { } while(0)
#define finish_arch_switch(rq) spin_unlock_irq(&(rq)->lock)
#define switch_to(prev,next,last) do { \
asm volatile("pushl %%esi\n\t" \
"pushl %%edi\n\t" \
......
......@@ -83,11 +83,6 @@ extern void cacheable_memzero(void *p, unsigned int nb);
struct device_node;
extern void note_scsi_host(struct device_node *, void *);
#define prepare_arch_schedule(prev) do { } while(0)
#define finish_arch_schedule(prev) do { } while(0)
#define prepare_arch_switch(rq) do { } while(0)
#define finish_arch_switch(rq) spin_unlock_irq(&(rq)->lock)
struct task_struct;
extern void __switch_to(struct task_struct *, struct task_struct *);
#define switch_to(prev, next, last) __switch_to((prev), (next))
......
......@@ -18,11 +18,6 @@
#endif
#include <linux/kernel.h>
#define prepare_arch_schedule(prev) do { } while (0)
#define finish_arch_schedule(prev) do { } while (0)
#define prepare_arch_switch(rq) do { } while (0)
#define finish_arch_switch(rq) spin_unlock_irq(&(rq)->lock)
#define switch_to(prev,next,last) do { \
if (prev == next) \
break; \
......
......@@ -18,11 +18,6 @@
#endif
#include <linux/kernel.h>
#define prepare_arch_schedule(prev) do { } while (0)
#define finish_arch_schedule(prev) do { } while (0)
#define prepare_arch_switch(rq) do { } while (0)
#define finish_arch_switch(rq) spin_unlock_irq(&(rq)->lock)
#define switch_to(prev,next),last do { \
if (prev == next) \
break; \
......
......@@ -140,13 +140,17 @@ extern void __flushw_user(void);
#define flush_user_windows flushw_user
#define flush_register_windows flushw_all
#define prepare_arch_schedule(prev) task_lock(prev)
#define finish_arch_schedule(prev) task_unlock(prev)
#define prepare_arch_switch(rq) \
do { spin_unlock(&(rq)->lock); \
flushw_all(); \
#define prepare_arch_switch(rq, next) \
do { spin_lock(&(next)->switch_lock); \
spin_unlock(&(rq)->lock); \
flushw_all(); \
} while (0)
#define finish_arch_switch(rq) local_irq_enable()
#define finish_arch_switch(rq, prev) \
do { spin_unlock_irq(&(prev)->switch_lock); \
} while (0)
#ifndef CONFIG_DEBUG_SPINLOCK
#define CHECK_LOCKS(PREV) do { } while(0)
......
......@@ -13,11 +13,6 @@
#define LOCK_PREFIX ""
#endif
#define prepare_arch_schedule(prev) do { } while(0)
#define finish_arch_schedule(prev) do { } while(0)
#define prepare_arch_switch(rq) do { } while(0)
#define finish_arch_switch(rq) spin_unlock_irq(&(rq)->lock)
#define __STR(x) #x
#define STR(x) __STR(x)
......
......@@ -47,7 +47,7 @@
lock_depth: -1, \
prio: MAX_PRIO-20, \
static_prio: MAX_PRIO-20, \
policy: SCHED_OTHER, \
policy: SCHED_NORMAL, \
cpus_allowed: -1, \
mm: NULL, \
active_mm: &init_mm, \
......@@ -78,6 +78,7 @@
pending: { NULL, &tsk.pending.head, {{0}}}, \
blocked: {{0}}, \
alloc_lock: SPIN_LOCK_UNLOCKED, \
switch_lock: SPIN_LOCK_UNLOCKED, \
journal_info: NULL, \
}
......
......@@ -116,7 +116,7 @@ extern unsigned long nr_uninterruptible(void);
/*
* Scheduling policies
*/
#define SCHED_OTHER 0
#define SCHED_NORMAL 0
#define SCHED_FIFO 1
#define SCHED_RR 2
......@@ -207,7 +207,7 @@ struct signal_struct {
/*
* Priority of a process goes from 0..MAX_PRIO-1, valid RT
* priority is 0..MAX_RT_PRIO-1, and SCHED_OTHER tasks are
* priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL tasks are
* in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values
* are inverted: lower p->prio value means higher priority.
*
......@@ -264,7 +264,7 @@ struct task_struct {
unsigned long policy;
unsigned long cpus_allowed;
unsigned int time_slice;
unsigned int time_slice, first_time_slice;
struct list_head tasks;
......@@ -361,6 +361,8 @@ struct task_struct {
u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty */
spinlock_t alloc_lock;
/* context-switch lock */
spinlock_t switch_lock;
/* journalling filesystem info */
void *journal_info;
......
......@@ -184,7 +184,7 @@ void reparent_to_init(void)
current->exit_signal = SIGCHLD;
current->ptrace = 0;
if ((current->policy == SCHED_OTHER) && (task_nice(current) < 0))
if ((current->policy == SCHED_NORMAL) && (task_nice(current) < 0))
set_user_nice(current, 0);
/* cpus_allowed? */
/* rt_priority? */
......
......@@ -611,7 +611,6 @@ struct task_struct *do_fork(unsigned long clone_flags,
unsigned long stack_size)
{
int retval;
unsigned long flags;
struct task_struct *p = NULL;
struct completion vfork;
......@@ -675,6 +674,7 @@ struct task_struct *do_fork(unsigned long clone_flags,
init_completion(&vfork);
}
spin_lock_init(&p->alloc_lock);
spin_lock_init(&p->switch_lock);
clear_tsk_thread_flag(p,TIF_SIGPENDING);
init_sigpending(&p->pending);
......@@ -740,8 +740,13 @@ struct task_struct *do_fork(unsigned long clone_flags,
* total amount of pending timeslices in the system doesnt change,
* resulting in more scheduling fairness.
*/
local_irq_save(flags);
p->time_slice = (current->time_slice + 1) >> 1;
local_irq_disable();
p->time_slice = (current->time_slice + 1) >> 1;
/*
* The remainder of the first timeslice might be recovered by
* the parent if the child exits early enough.
*/
p->first_time_slice = 1;
current->time_slice >>= 1;
p->sleep_timestamp = jiffies;
if (!current->time_slice) {
......@@ -753,11 +758,10 @@ struct task_struct *do_fork(unsigned long clone_flags,
current->time_slice = 1;
preempt_disable();
scheduler_tick(0, 0);
local_irq_restore(flags);
local_irq_enable();
preempt_enable();
} else
local_irq_restore(flags);
local_irq_enable();
/*
* Ok, add it to the run-queues and make it
* visible to the rest of the system.
......
......@@ -190,16 +190,19 @@ int request_module(const char * module_name)
pid_t pid;
int waitpid_result;
sigset_t tmpsig;
int i;
int i, ret;
static atomic_t kmod_concurrent = ATOMIC_INIT(0);
#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
static int kmod_loop_msg;
unsigned long saved_policy = current->policy;
current->policy = SCHED_NORMAL;
/* Don't allow request_module() before the root fs is mounted! */
if ( ! current->fs->root ) {
printk(KERN_ERR "request_module[%s]: Root fs not mounted\n",
module_name);
return -EPERM;
ret = -EPERM;
goto out;
}
/* If modprobe needs a service that is in a module, we get a recursive
......@@ -220,14 +223,16 @@ int request_module(const char * module_name)
printk(KERN_ERR
"kmod: runaway modprobe loop assumed and stopped\n");
atomic_dec(&kmod_concurrent);
return -ENOMEM;
ret = -ENOMEM;
goto out;
}
pid = kernel_thread(exec_modprobe, (void*) module_name, 0);
if (pid < 0) {
printk(KERN_ERR "request_module[%s]: fork failed, errno %d\n", module_name, -pid);
atomic_dec(&kmod_concurrent);
return pid;
ret = pid;
goto out;
}
/* Block everything but SIGKILL/SIGSTOP */
......@@ -250,7 +255,10 @@ int request_module(const char * module_name)
printk(KERN_ERR "request_module[%s]: waitpid(%d,...) failed, errno %d\n",
module_name, pid, -waitpid_result);
}
return 0;
ret = 0;
out:
current->policy = saved_policy;
return ret;
}
#endif /* CONFIG_KMOD */
......
This diff is collapsed.
......@@ -500,7 +500,6 @@ inline void signal_wake_up(struct task_struct *t)
{
set_tsk_thread_flag(t,TIF_SIGPENDING);
#ifdef CONFIG_SMP
/*
* If the task is running on a different CPU
* force a reschedule on the other CPU to make
......@@ -511,9 +510,8 @@ inline void signal_wake_up(struct task_struct *t)
* process of changing - but no harm is done by that
* other than doing an extra (lightweight) IPI interrupt.
*/
if ((t->state == TASK_RUNNING) && (t->thread_info->cpu != smp_processor_id()))
if (t->state == TASK_RUNNING)
kick_if_running(t);
#endif
if (t->state & TASK_INTERRUPTIBLE) {
wake_up_process(t);
return;
......
......@@ -888,7 +888,7 @@ asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
current->policy != SCHED_OTHER)
current->policy != SCHED_NORMAL)
{
/*
* Short delay requests up to 2 ms will be handled with
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment