Commit fcd05809 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched

* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched:
  sched: mark CONFIG_FAIR_GROUP_SCHED as !EXPERIMENTAL
  sched: isolate SMP balancing code a bit more
  sched: reduce balance-tasks overhead
  sched: make cpu_shares_{show,store}() static
  sched: clean up some control group code
  sched: constify sched.h
  sched: document profile=sleep requiring CONFIG_SCHEDSTATS
  sched: use show_regs() to improve __schedule_bug() output
  sched: clean up sched_domain_debug()
  sched: fix fastcall mismatch in completion APIs
  sched: fix sched_domain sysctl registration again
parents f9e83489 8ef93cf1
......@@ -1444,7 +1444,8 @@ and is between 256 and 4096 characters. It is defined in the file
Param: "schedule" - profile schedule points.
Param: <number> - step/bucket size as a power of 2 for
statistical time based profiling.
Param: "sleep" - profile D-state sleeping (millisecs)
Param: "sleep" - profile D-state sleeping (millisecs).
Requires CONFIG_SCHEDSTATS
Param: "kvm" - profile VM exits.
processor.max_cstate= [HW,ACPI]
......
......@@ -42,15 +42,15 @@ static inline void init_completion(struct completion *x)
init_waitqueue_head(&x->wait);
}
extern void FASTCALL(wait_for_completion(struct completion *));
extern int FASTCALL(wait_for_completion_interruptible(struct completion *x));
extern unsigned long FASTCALL(wait_for_completion_timeout(struct completion *x,
unsigned long timeout));
extern unsigned long FASTCALL(wait_for_completion_interruptible_timeout(
struct completion *x, unsigned long timeout));
extern void FASTCALL(complete(struct completion *));
extern void FASTCALL(complete_all(struct completion *));
extern void wait_for_completion(struct completion *);
extern int wait_for_completion_interruptible(struct completion *x);
extern unsigned long wait_for_completion_timeout(struct completion *x,
unsigned long timeout);
extern unsigned long wait_for_completion_interruptible_timeout(
struct completion *x, unsigned long timeout);
extern void complete(struct completion *);
extern void complete_all(struct completion *);
#define INIT_COMPLETION(x) ((x).done = 0)
......
......@@ -828,12 +828,17 @@ struct sched_class {
struct task_struct * (*pick_next_task) (struct rq *rq);
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
#ifdef CONFIG_SMP
unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct rq *busiest, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio);
int (*move_one_task) (struct rq *this_rq, int this_cpu,
struct rq *busiest, struct sched_domain *sd,
enum cpu_idle_type idle);
#endif
void (*set_curr_task) (struct rq *rq);
void (*task_tick) (struct rq *rq, struct task_struct *p);
void (*task_new) (struct rq *rq, struct task_struct *p);
......@@ -1196,7 +1201,7 @@ static inline int rt_prio(int prio)
return 0;
}
static inline int rt_task(struct task_struct *p)
static inline int rt_task(const struct task_struct *p)
{
return rt_prio(p->prio);
}
......@@ -1211,22 +1216,22 @@ static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp)
tsk->signal->__pgrp = pgrp;
}
static inline struct pid *task_pid(struct task_struct *task)
static inline struct pid *task_pid(const struct task_struct *task)
{
return task->pids[PIDTYPE_PID].pid;
}
static inline struct pid *task_tgid(struct task_struct *task)
static inline struct pid *task_tgid(const struct task_struct *task)
{
return task->group_leader->pids[PIDTYPE_PID].pid;
}
static inline struct pid *task_pgrp(struct task_struct *task)
static inline struct pid *task_pgrp(const struct task_struct *task)
{
return task->group_leader->pids[PIDTYPE_PGID].pid;
}
static inline struct pid *task_session(struct task_struct *task)
static inline struct pid *task_session(const struct task_struct *task)
{
return task->group_leader->pids[PIDTYPE_SID].pid;
}
......@@ -1255,7 +1260,7 @@ struct pid_namespace;
* see also pid_nr() etc in include/linux/pid.h
*/
static inline pid_t task_pid_nr(struct task_struct *tsk)
static inline pid_t task_pid_nr(const struct task_struct *tsk)
{
return tsk->pid;
}
......@@ -1268,7 +1273,7 @@ static inline pid_t task_pid_vnr(struct task_struct *tsk)
}
static inline pid_t task_tgid_nr(struct task_struct *tsk)
static inline pid_t task_tgid_nr(const struct task_struct *tsk)
{
return tsk->tgid;
}
......@@ -1281,7 +1286,7 @@ static inline pid_t task_tgid_vnr(struct task_struct *tsk)
}
static inline pid_t task_pgrp_nr(struct task_struct *tsk)
static inline pid_t task_pgrp_nr(const struct task_struct *tsk)
{
return tsk->signal->__pgrp;
}
......@@ -1294,7 +1299,7 @@ static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
}
static inline pid_t task_session_nr(struct task_struct *tsk)
static inline pid_t task_session_nr(const struct task_struct *tsk)
{
return tsk->signal->__session;
}
......@@ -1321,7 +1326,7 @@ static inline pid_t task_ppid_nr_ns(struct task_struct *tsk,
* If pid_alive fails, then pointers within the task structure
* can be stale and must not be dereferenced.
*/
static inline int pid_alive(struct task_struct *p)
static inline int pid_alive(const struct task_struct *p)
{
return p->pids[PIDTYPE_PID].pid != NULL;
}
......@@ -1332,7 +1337,7 @@ static inline int pid_alive(struct task_struct *p)
*
* Check if a task structure is the first user space task the kernel created.
*/
static inline int is_global_init(struct task_struct *tsk)
static inline int is_global_init(const struct task_struct *tsk)
{
return tsk->pid == 1;
}
......@@ -1469,7 +1474,7 @@ extern int rt_mutex_getprio(struct task_struct *p);
extern void rt_mutex_setprio(struct task_struct *p, int prio);
extern void rt_mutex_adjust_pi(struct task_struct *p);
#else
static inline int rt_mutex_getprio(struct task_struct *p)
static inline int rt_mutex_getprio(const struct task_struct *p)
{
return p->normal_prio;
}
......@@ -1721,7 +1726,7 @@ extern void wait_task_inactive(struct task_struct * p);
* all we care about is that we have a task with the appropriate
* pid, we don't actually care if we have the right task.
*/
static inline int has_group_leader_pid(struct task_struct *p)
static inline int has_group_leader_pid(const struct task_struct *p)
{
return p->pid == p->tgid;
}
......@@ -1738,7 +1743,7 @@ static inline struct task_struct *next_thread(const struct task_struct *p)
struct task_struct, thread_group);
}
static inline int thread_group_empty(struct task_struct *p)
static inline int thread_group_empty(const struct task_struct *p)
{
return list_empty(&p->thread_group);
}
......
......@@ -322,7 +322,6 @@ config CPUSETS
config FAIR_GROUP_SCHED
bool "Fair group CPU scheduler"
default y
depends on EXPERIMENTAL
help
This feature lets CPU scheduler recognize task groups and control CPU
bandwidth allocation to such task groups.
......
......@@ -60,6 +60,7 @@ static int __init profile_setup(char * str)
int par;
if (!strncmp(str, sleepstr, strlen(sleepstr))) {
#ifdef CONFIG_SCHEDSTATS
prof_on = SLEEP_PROFILING;
if (str[strlen(sleepstr)] == ',')
str += strlen(sleepstr) + 1;
......@@ -68,6 +69,10 @@ static int __init profile_setup(char * str)
printk(KERN_INFO
"kernel sleep profiling enabled (shift: %ld)\n",
prof_shift);
#else
printk(KERN_WARNING
"kernel sleep profiling requires CONFIG_SCHEDSTATS\n");
#endif /* CONFIG_SCHEDSTATS */
} else if (!strncmp(str, schedstr, strlen(schedstr))) {
prof_on = SCHED_PROFILING;
if (str[strlen(schedstr)] == ',')
......
......@@ -66,6 +66,7 @@
#include <linux/pagemap.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
/*
* Scheduler clock - returns current time in nanosec units.
......@@ -837,11 +838,18 @@ struct rq_iterator {
struct task_struct *(*next)(void *);
};
static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, unsigned long *load_moved,
int *this_best_prio, struct rq_iterator *iterator);
#ifdef CONFIG_SMP
static unsigned long
balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_load_move, struct sched_domain *sd,
enum cpu_idle_type idle, int *all_pinned,
int *this_best_prio, struct rq_iterator *iterator);
static int
iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle,
struct rq_iterator *iterator);
#endif
#include "sched_stats.h"
#include "sched_idletask.c"
......@@ -2223,17 +2231,17 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
return 1;
}
static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, unsigned long *load_moved,
int *this_best_prio, struct rq_iterator *iterator)
static unsigned long
balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_load_move, struct sched_domain *sd,
enum cpu_idle_type idle, int *all_pinned,
int *this_best_prio, struct rq_iterator *iterator)
{
int pulled = 0, pinned = 0, skip_for_load;
struct task_struct *p;
long rem_load_move = max_load_move;
if (max_nr_move == 0 || max_load_move == 0)
if (max_load_move == 0)
goto out;
pinned = 1;
......@@ -2266,7 +2274,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
* We only want to steal up to the prescribed number of tasks
* and the prescribed amount of weighted load.
*/
if (pulled < max_nr_move && rem_load_move > 0) {
if (rem_load_move > 0) {
if (p->prio < *this_best_prio)
*this_best_prio = p->prio;
p = iterator->next(iterator->arg);
......@@ -2274,7 +2282,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
}
out:
/*
* Right now, this is the only place pull_task() is called,
* Right now, this is one of only two places pull_task() is called,
* so we can safely collect pull_task() stats here rather than
* inside pull_task().
*/
......@@ -2282,8 +2290,8 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
if (all_pinned)
*all_pinned = pinned;
*load_moved = max_load_move - rem_load_move;
return pulled;
return max_load_move - rem_load_move;
}
/*
......@@ -2305,7 +2313,7 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
do {
total_load_moved +=
class->load_balance(this_rq, this_cpu, busiest,
ULONG_MAX, max_load_move - total_load_moved,
max_load_move - total_load_moved,
sd, idle, all_pinned, &this_best_prio);
class = class->next;
} while (class && max_load_move > total_load_moved);
......@@ -2313,6 +2321,32 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
return total_load_moved > 0;
}
static int
iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle,
struct rq_iterator *iterator)
{
struct task_struct *p = iterator->start(iterator->arg);
int pinned = 0;
while (p) {
if (can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
pull_task(busiest, p, this_rq, this_cpu);
/*
* Right now, this is only the second place pull_task()
* is called, so we can safely collect pull_task()
* stats here rather than inside pull_task().
*/
schedstat_inc(sd, lb_gained[idle]);
return 1;
}
p = iterator->next(iterator->arg);
}
return 0;
}
/*
* move_one_task tries to move exactly one task from busiest to this_rq, as
* part of active balancing operations within "domain".
......@@ -2324,12 +2358,9 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle)
{
const struct sched_class *class;
int this_best_prio = MAX_PRIO;
for (class = sched_class_highest; class; class = class->next)
if (class->load_balance(this_rq, this_cpu, busiest,
1, ULONG_MAX, sd, idle, NULL,
&this_best_prio))
if (class->move_one_task(this_rq, this_cpu, busiest, sd, idle))
return 1;
return 0;
......@@ -3266,18 +3297,6 @@ static inline void idle_balance(int cpu, struct rq *rq)
{
}
/* Avoid "used but not defined" warning on UP */
static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, unsigned long *load_moved,
int *this_best_prio, struct rq_iterator *iterator)
{
*load_moved = 0;
return 0;
}
#endif
DEFINE_PER_CPU(struct kernel_stat, kstat);
......@@ -3507,12 +3526,19 @@ EXPORT_SYMBOL(sub_preempt_count);
*/
static noinline void __schedule_bug(struct task_struct *prev)
{
printk(KERN_ERR "BUG: scheduling while atomic: %s/0x%08x/%d\n",
prev->comm, preempt_count(), task_pid_nr(prev));
struct pt_regs *regs = get_irq_regs();
printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
prev->comm, prev->pid, preempt_count());
debug_show_held_locks(prev);
if (irqs_disabled())
print_irqtrace_events(prev);
dump_stack();
if (regs)
show_regs(regs);
else
dump_stack();
}
/*
......@@ -3820,7 +3846,7 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
}
EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
void fastcall complete(struct completion *x)
void complete(struct completion *x)
{
unsigned long flags;
......@@ -3832,7 +3858,7 @@ void fastcall complete(struct completion *x)
}
EXPORT_SYMBOL(complete);
void fastcall complete_all(struct completion *x)
void complete_all(struct completion *x)
{
unsigned long flags;
......@@ -3884,13 +3910,13 @@ wait_for_common(struct completion *x, long timeout, int state)
return timeout;
}
void fastcall __sched wait_for_completion(struct completion *x)
void __sched wait_for_completion(struct completion *x)
{
wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(wait_for_completion);
unsigned long fastcall __sched
unsigned long __sched
wait_for_completion_timeout(struct completion *x, unsigned long timeout)
{
return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE);
......@@ -3906,7 +3932,7 @@ int __sched wait_for_completion_interruptible(struct completion *x)
}
EXPORT_SYMBOL(wait_for_completion_interruptible);
unsigned long fastcall __sched
unsigned long __sched
wait_for_completion_interruptible_timeout(struct completion *x,
unsigned long timeout)
{
......@@ -5461,11 +5487,12 @@ static void register_sched_domain_sysctl(void)
struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
char buf[32];
WARN_ON(sd_ctl_dir[0].child);
sd_ctl_dir[0].child = entry;
if (entry == NULL)
return;
sd_ctl_dir[0].child = entry;
for_each_online_cpu(i) {
snprintf(buf, 32, "cpu%d", i);
entry->procname = kstrdup(buf, GFP_KERNEL);
......@@ -5473,14 +5500,19 @@ static void register_sched_domain_sysctl(void)
entry->child = sd_alloc_ctl_cpu_table(i);
entry++;
}
WARN_ON(sd_sysctl_header);
sd_sysctl_header = register_sysctl_table(sd_ctl_root);
}
/* may be called multiple times per register */
static void unregister_sched_domain_sysctl(void)
{
unregister_sysctl_table(sd_sysctl_header);
if (sd_sysctl_header)
unregister_sysctl_table(sd_sysctl_header);
sd_sysctl_header = NULL;
sd_free_ctl_entry(&sd_ctl_dir[0].child);
if (sd_ctl_dir[0].child)
sd_free_ctl_entry(&sd_ctl_dir[0].child);
}
#else
static void register_sched_domain_sysctl(void)
......@@ -5611,101 +5643,101 @@ int nr_cpu_ids __read_mostly = NR_CPUS;
EXPORT_SYMBOL(nr_cpu_ids);
#ifdef CONFIG_SCHED_DEBUG
static void sched_domain_debug(struct sched_domain *sd, int cpu)
static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level)
{
int level = 0;
struct sched_group *group = sd->groups;
cpumask_t groupmask;
char str[NR_CPUS];
if (!sd) {
printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);
return;
cpumask_scnprintf(str, NR_CPUS, sd->span);
cpus_clear(groupmask);
printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
if (!(sd->flags & SD_LOAD_BALANCE)) {
printk("does not load-balance\n");
if (sd->parent)
printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain"
" has parent");
return -1;
}
printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
printk(KERN_CONT "span %s\n", str);
if (!cpu_isset(cpu, sd->span)) {
printk(KERN_ERR "ERROR: domain->span does not contain "
"CPU%d\n", cpu);
}
if (!cpu_isset(cpu, group->cpumask)) {
printk(KERN_ERR "ERROR: domain->groups does not contain"
" CPU%d\n", cpu);
}
printk(KERN_DEBUG "%*s groups:", level + 1, "");
do {
int i;
char str[NR_CPUS];
struct sched_group *group = sd->groups;
cpumask_t groupmask;
cpumask_scnprintf(str, NR_CPUS, sd->span);
cpus_clear(groupmask);
printk(KERN_DEBUG);
for (i = 0; i < level + 1; i++)
printk(" ");
printk("domain %d: ", level);
if (!(sd->flags & SD_LOAD_BALANCE)) {
printk("does not load-balance\n");
if (sd->parent)
printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain"
" has parent");
if (!group) {
printk("\n");
printk(KERN_ERR "ERROR: group is NULL\n");
break;
}
printk("span %s\n", str);
if (!group->__cpu_power) {
printk(KERN_CONT "\n");
printk(KERN_ERR "ERROR: domain->cpu_power not "
"set\n");
break;
}
if (!cpu_isset(cpu, sd->span))
printk(KERN_ERR "ERROR: domain->span does not contain "
"CPU%d\n", cpu);
if (!cpu_isset(cpu, group->cpumask))
printk(KERN_ERR "ERROR: domain->groups does not contain"
" CPU%d\n", cpu);
if (!cpus_weight(group->cpumask)) {
printk(KERN_CONT "\n");
printk(KERN_ERR "ERROR: empty group\n");
break;
}
printk(KERN_DEBUG);
for (i = 0; i < level + 2; i++)
printk(" ");
printk("groups:");
do {
if (!group) {
printk("\n");
printk(KERN_ERR "ERROR: group is NULL\n");
break;
}
if (cpus_intersects(groupmask, group->cpumask)) {
printk(KERN_CONT "\n");
printk(KERN_ERR "ERROR: repeated CPUs\n");
break;
}
if (!group->__cpu_power) {
printk(KERN_CONT "\n");
printk(KERN_ERR "ERROR: domain->cpu_power not "
"set\n");
break;
}
cpus_or(groupmask, groupmask, group->cpumask);
if (!cpus_weight(group->cpumask)) {
printk(KERN_CONT "\n");
printk(KERN_ERR "ERROR: empty group\n");
break;
}
cpumask_scnprintf(str, NR_CPUS, group->cpumask);
printk(KERN_CONT " %s", str);
if (cpus_intersects(groupmask, group->cpumask)) {
printk(KERN_CONT "\n");
printk(KERN_ERR "ERROR: repeated CPUs\n");
break;
}
group = group->next;
} while (group != sd->groups);
printk(KERN_CONT "\n");
cpus_or(groupmask, groupmask, group->cpumask);
if (!cpus_equal(sd->span, groupmask))
printk(KERN_ERR "ERROR: groups don't span domain->span\n");
cpumask_scnprintf(str, NR_CPUS, group->cpumask);
printk(KERN_CONT " %s", str);
if (sd->parent && !cpus_subset(groupmask, sd->parent->span))
printk(KERN_ERR "ERROR: parent span is not a superset "
"of domain->span\n");
return 0;
}
group = group->next;
} while (group != sd->groups);
printk(KERN_CONT "\n");
static void sched_domain_debug(struct sched_domain *sd, int cpu)
{
int level = 0;
if (!cpus_equal(sd->span, groupmask))
printk(KERN_ERR "ERROR: groups don't span "
"domain->span\n");
if (!sd) {
printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);
return;
}
printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
for (;;) {
if (sched_domain_debug_one(sd, cpu, level))
break;
level++;
sd = sd->parent;
if (!sd)
continue;
if (!cpus_subset(groupmask, sd->span))
printk(KERN_ERR "ERROR: parent span is not a superset "
"of domain->span\n");
} while (sd);
break;
}
}
#else
# define sched_domain_debug(sd, cpu) do { } while (0)
......@@ -6424,13 +6456,17 @@ static cpumask_t fallback_doms;
*/
static int arch_init_sched_domains(const cpumask_t *cpu_map)
{
int err;
ndoms_cur = 1;
doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
if (!doms_cur)
doms_cur = &fallback_doms;
cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map);
err = build_sched_domains(doms_cur);
register_sched_domain_sysctl();
return build_sched_domains(doms_cur);
return err;
}
static void arch_destroy_sched_domains(const cpumask_t *cpu_map)
......@@ -6479,6 +6515,9 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new)
{
int i, j;
/* always unregister in case we don't destroy any domains */
unregister_sched_domain_sysctl();
if (doms_new == NULL) {
ndoms_new = 1;
doms_new = &fallback_doms;
......@@ -6514,6 +6553,8 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new)
kfree(doms_cur);
doms_cur = doms_new;
ndoms_cur = ndoms_new;
register_sched_domain_sysctl();
}
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
......@@ -7101,25 +7142,25 @@ unsigned long sched_group_shares(struct task_group *tg)
#ifdef CONFIG_FAIR_CGROUP_SCHED
/* return corresponding task_group object of a cgroup */
static inline struct task_group *cgroup_tg(struct cgroup *cont)
static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
{
return container_of(cgroup_subsys_state(cont, cpu_cgroup_subsys_id),
struct task_group, css);
return container_of(cgroup_subsys_state(cgrp, cpu_cgroup_subsys_id),
struct task_group, css);
}
static struct cgroup_subsys_state *
cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
{
struct task_group *tg;
if (!cont->parent) {
if (!cgrp->parent) {
/* This is early initialization for the top cgroup */
init_task_group.css.cgroup = cont;
init_task_group.css.cgroup = cgrp;
return &init_task_group.css;
}
/* we support only 1-level deep hierarchical scheduler atm */
if (cont->parent->parent)
if (cgrp->parent->parent)
return ERR_PTR(-EINVAL);
tg = sched_create_group();
......@@ -7127,21 +7168,21 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
return ERR_PTR(-ENOMEM);
/* Bind the cgroup to task_group object we just created */
tg->css.cgroup = cont;
tg->css.cgroup = cgrp;
return &tg->css;
}
static void cpu_cgroup_destroy(struct cgroup_subsys *ss,
struct cgroup *cont)
struct cgroup *cgrp)
{
struct task_group *tg = cgroup_tg(cont);
struct task_group *tg = cgroup_tg(cgrp);
sched_destroy_group(tg);
}
static int cpu_cgroup_can_attach(struct cgroup_subsys *ss,
struct cgroup *cont, struct task_struct *tsk)
struct cgroup *cgrp, struct task_struct *tsk)
{
/* We don't support RT-tasks being in separate groups */
if (tsk->sched_class != &fair_sched_class)
......@@ -7151,38 +7192,21 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys *ss,
}
static void
cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cont,
cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
struct cgroup *old_cont, struct task_struct *tsk)
{
sched_move_task(tsk);
}
static ssize_t cpu_shares_write(struct cgroup *cont, struct cftype *cftype,
struct file *file, const char __user *userbuf,
size_t nbytes, loff_t *ppos)
static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype,
u64 shareval)
{
unsigned long shareval;
struct task_group *tg = cgroup_tg(cont);
char buffer[2*sizeof(unsigned long) + 1];
int rc;
if (nbytes > 2*sizeof(unsigned long)) /* safety check */
return -E2BIG;
if (copy_from_user(buffer, userbuf, nbytes))
return -EFAULT;
buffer[nbytes] = 0; /* nul-terminate */
shareval = simple_strtoul(buffer, NULL, 10);
rc = sched_group_set_shares(tg, shareval);
return (rc < 0 ? rc : nbytes);
return sched_group_set_shares(cgroup_tg(cgrp), shareval);
}
static u64 cpu_shares_read_uint(struct cgroup *cont, struct cftype *cft)
static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft)
{
struct task_group *tg = cgroup_tg(cont);
struct task_group *tg = cgroup_tg(cgrp);
return (u64) tg->shares;
}
......@@ -7190,7 +7214,7 @@ static u64 cpu_shares_read_uint(struct cgroup *cont, struct cftype *cft)
static struct cftype cpu_shares = {
.name = "shares",
.read_uint = cpu_shares_read_uint,
.write = cpu_shares_write,
.write_uint = cpu_shares_write_uint,
};
static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
......
......@@ -876,6 +876,7 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
}
}
#ifdef CONFIG_SMP
/**************************************************
* Fair scheduling class load-balancing methods:
*/
......@@ -936,12 +937,11 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
static unsigned long
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio)
{
struct cfs_rq *busy_cfs_rq;
unsigned long load_moved, total_nr_moved = 0, nr_moved;
long rem_load_move = max_load_move;
struct rq_iterator cfs_rq_iterator;
......@@ -969,25 +969,48 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
#else
# define maxload rem_load_move
#endif
/* pass busy_cfs_rq argument into
/*
* pass busy_cfs_rq argument into
* load_balance_[start|next]_fair iterators
*/
cfs_rq_iterator.arg = busy_cfs_rq;
nr_moved = balance_tasks(this_rq, this_cpu, busiest,
max_nr_move, maxload, sd, idle, all_pinned,
&load_moved, this_best_prio, &cfs_rq_iterator);
total_nr_moved += nr_moved;
max_nr_move -= nr_moved;
rem_load_move -= load_moved;
rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
maxload, sd, idle, all_pinned,
this_best_prio,
&cfs_rq_iterator);
if (max_nr_move <= 0 || rem_load_move <= 0)
if (rem_load_move <= 0)
break;
}
return max_load_move - rem_load_move;
}
static int
move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle)
{
struct cfs_rq *busy_cfs_rq;
struct rq_iterator cfs_rq_iterator;
cfs_rq_iterator.start = load_balance_start_fair;
cfs_rq_iterator.next = load_balance_next_fair;
for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
/*
* pass busy_cfs_rq argument into
* load_balance_[start|next]_fair iterators
*/
cfs_rq_iterator.arg = busy_cfs_rq;
if (iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
&cfs_rq_iterator))
return 1;
}
return 0;
}
#endif
/*
* scheduler tick hitting a task of our scheduling class:
*/
......@@ -1063,7 +1086,10 @@ static const struct sched_class fair_sched_class = {
.pick_next_task = pick_next_task_fair,
.put_prev_task = put_prev_task_fair,
#ifdef CONFIG_SMP
.load_balance = load_balance_fair,
.move_one_task = move_one_task_fair,
#endif
.set_curr_task = set_curr_task_fair,
.task_tick = task_tick_fair,
......
......@@ -37,15 +37,24 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
{
}
#ifdef CONFIG_SMP
static unsigned long
load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio)
unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio)
{
return 0;
}
static int
move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle)
{
return 0;
}
#endif
static void task_tick_idle(struct rq *rq, struct task_struct *curr)
{
}
......@@ -69,7 +78,10 @@ const struct sched_class idle_sched_class = {
.pick_next_task = pick_next_task_idle,
.put_prev_task = put_prev_task_idle,
#ifdef CONFIG_SMP
.load_balance = load_balance_idle,
.move_one_task = move_one_task_idle,
#endif
.set_curr_task = set_curr_task_idle,
.task_tick = task_tick_idle,
......
......@@ -98,6 +98,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
p->se.exec_start = 0;
}
#ifdef CONFIG_SMP
/*
* Load-balancing iterator. Note: while the runqueue stays locked
* during the whole iteration, the current task might be
......@@ -172,13 +173,11 @@ static struct task_struct *load_balance_next_rt(void *arg)
static unsigned long
load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio)
unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio)
{
int nr_moved;
struct rq_iterator rt_rq_iterator;
unsigned long load_moved;
rt_rq_iterator.start = load_balance_start_rt;
rt_rq_iterator.next = load_balance_next_rt;
......@@ -187,12 +186,24 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
*/
rt_rq_iterator.arg = busiest;
nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
max_load_move, sd, idle, all_pinned, &load_moved,
this_best_prio, &rt_rq_iterator);
return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd,
idle, all_pinned, this_best_prio, &rt_rq_iterator);
}
static int
move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle)
{
struct rq_iterator rt_rq_iterator;
rt_rq_iterator.start = load_balance_start_rt;
rt_rq_iterator.next = load_balance_next_rt;
rt_rq_iterator.arg = busiest;
return load_moved;
return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
&rt_rq_iterator);
}
#endif
static void task_tick_rt(struct rq *rq, struct task_struct *p)
{
......@@ -236,7 +247,10 @@ const struct sched_class rt_sched_class = {
.pick_next_task = pick_next_task_rt,
.put_prev_task = put_prev_task_rt,
#ifdef CONFIG_SMP
.load_balance = load_balance_rt,
.move_one_task = move_one_task_rt,
#endif
.set_curr_task = set_curr_task_rt,
.task_tick = task_tick_rt,
......
......@@ -129,7 +129,7 @@ static inline void uids_mutex_unlock(void)
}
/* return cpu shares held by the user */
ssize_t cpu_shares_show(struct kset *kset, char *buffer)
static ssize_t cpu_shares_show(struct kset *kset, char *buffer)
{
struct user_struct *up = container_of(kset, struct user_struct, kset);
......@@ -137,7 +137,8 @@ ssize_t cpu_shares_show(struct kset *kset, char *buffer)
}
/* modify cpu shares held by the user */
ssize_t cpu_shares_store(struct kset *kset, const char *buffer, size_t size)
static ssize_t cpu_shares_store(struct kset *kset, const char *buffer,
size_t size)
{
struct user_struct *up = container_of(kset, struct user_struct, kset);
unsigned long shares;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment