Commit b11ce8a2 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-v28-for-linus' of...

Merge branch 'sched-v28-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'sched-v28-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (38 commits)
  sched debug: add name to sched_domain sysctl entries
  sched: sync wakeups vs avg_overlap
  sched: remove redundant code in cpu_cgroup_create()
  sched_rt.c: resch needed in rt_rq_enqueue() for the root rt_rq
  cpusets: scan_for_empty_cpusets(), cpuset doesn't seem to be so const
  sched: minor optimizations in wake_affine and select_task_rq_fair
  sched: maintain only task entities in cfs_rq->tasks list
  sched: fixup buddy selection
  sched: more sanity checks on the bandwidth settings
  sched: add some comments to the bandwidth code
  sched: fixlet for group load balance
  sched: rework wakeup preemption
  CFS scheduler: documentation about scheduling policies
  sched: clarify ifdef tangle
  sched: fix list traversal to use _rcu variant
  sched: turn off WAKEUP_OVERLAP
  sched: wakeup preempt when small overlap
  kernel/cpu.c: create a CPU_STARTING cpu_chain notifier
  kernel/cpu.c: Move the CPU_DYING notifiers
  sched: fix __load_balance_iterator() for cfq with only one task
  ...
parents f6bccf69 a5d8c348
...@@ -168,10 +168,10 @@ if ($#ARGV < 0) { ...@@ -168,10 +168,10 @@ if ($#ARGV < 0) {
mkdir $ARGV[0],0777; mkdir $ARGV[0],0777;
$state = 0; $state = 0;
while (<STDIN>) { while (<STDIN>) {
if (/^\.TH \"[^\"]*\" 4 \"([^\"]*)\"/) { if (/^\.TH \"[^\"]*\" 9 \"([^\"]*)\"/) {
if ($state == 1) { close OUT } if ($state == 1) { close OUT }
$state = 1; $state = 1;
$fn = "$ARGV[0]/$1.4"; $fn = "$ARGV[0]/$1.9";
print STDERR "Creating $fn\n"; print STDERR "Creating $fn\n";
open OUT, ">$fn" or die "can't open $fn: $!\n"; open OUT, ">$fn" or die "can't open $fn: $!\n";
print OUT $_; print OUT $_;
......
...@@ -149,6 +149,9 @@ smp_callin(void) ...@@ -149,6 +149,9 @@ smp_callin(void)
atomic_inc(&init_mm.mm_count); atomic_inc(&init_mm.mm_count);
current->active_mm = &init_mm; current->active_mm = &init_mm;
/* inform the notifiers about the new cpu */
notify_cpu_starting(cpuid);
/* Must have completely accurate bogos. */ /* Must have completely accurate bogos. */
local_irq_enable(); local_irq_enable();
......
...@@ -277,6 +277,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) ...@@ -277,6 +277,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
/* /*
* Enable local interrupts. * Enable local interrupts.
*/ */
notify_cpu_starting(cpu);
local_irq_enable(); local_irq_enable();
local_fiq_enable(); local_fiq_enable();
......
...@@ -178,6 +178,7 @@ void __init smp_callin(void) ...@@ -178,6 +178,7 @@ void __init smp_callin(void)
unmask_irq(IPI_INTR_VECT); unmask_irq(IPI_INTR_VECT);
unmask_irq(TIMER0_INTR_VECT); unmask_irq(TIMER0_INTR_VECT);
preempt_disable(); preempt_disable();
notify_cpu_starting(cpu);
local_irq_enable(); local_irq_enable();
cpu_set(cpu, cpu_online_map); cpu_set(cpu, cpu_online_map);
......
...@@ -401,6 +401,7 @@ smp_callin (void) ...@@ -401,6 +401,7 @@ smp_callin (void)
spin_lock(&vector_lock); spin_lock(&vector_lock);
/* Setup the per cpu irq handling data structures */ /* Setup the per cpu irq handling data structures */
__setup_vector_irq(cpuid); __setup_vector_irq(cpuid);
notify_cpu_starting(cpuid);
cpu_set(cpuid, cpu_online_map); cpu_set(cpuid, cpu_online_map);
per_cpu(cpu_state, cpuid) = CPU_ONLINE; per_cpu(cpu_state, cpuid) = CPU_ONLINE;
spin_unlock(&vector_lock); spin_unlock(&vector_lock);
......
...@@ -498,6 +498,8 @@ static void __init smp_online(void) ...@@ -498,6 +498,8 @@ static void __init smp_online(void)
{ {
int cpu_id = smp_processor_id(); int cpu_id = smp_processor_id();
notify_cpu_starting(cpu_id);
local_irq_enable(); local_irq_enable();
/* Get our bogomips. */ /* Get our bogomips. */
......
...@@ -121,6 +121,8 @@ asmlinkage __cpuinit void start_secondary(void) ...@@ -121,6 +121,8 @@ asmlinkage __cpuinit void start_secondary(void)
cpu = smp_processor_id(); cpu = smp_processor_id();
cpu_data[cpu].udelay_val = loops_per_jiffy; cpu_data[cpu].udelay_val = loops_per_jiffy;
notify_cpu_starting(cpu);
mp_ops->smp_finish(); mp_ops->smp_finish();
set_cpu_sibling_map(cpu); set_cpu_sibling_map(cpu);
......
...@@ -453,6 +453,7 @@ int __devinit start_secondary(void *unused) ...@@ -453,6 +453,7 @@ int __devinit start_secondary(void *unused)
secondary_cpu_time_init(); secondary_cpu_time_init();
ipi_call_lock(); ipi_call_lock();
notify_cpu_starting(cpu);
cpu_set(cpu, cpu_online_map); cpu_set(cpu, cpu_online_map);
/* Update sibling maps */ /* Update sibling maps */
base = cpu_first_thread_in_core(cpu); base = cpu_first_thread_in_core(cpu);
......
...@@ -585,6 +585,8 @@ int __cpuinit start_secondary(void *cpuvoid) ...@@ -585,6 +585,8 @@ int __cpuinit start_secondary(void *cpuvoid)
/* Enable pfault pseudo page faults on this cpu. */ /* Enable pfault pseudo page faults on this cpu. */
pfault_init(); pfault_init();
/* call cpu notifiers */
notify_cpu_starting(smp_processor_id());
/* Mark this cpu as online */ /* Mark this cpu as online */
spin_lock(&call_lock); spin_lock(&call_lock);
cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_online_map);
......
...@@ -82,6 +82,8 @@ asmlinkage void __cpuinit start_secondary(void) ...@@ -82,6 +82,8 @@ asmlinkage void __cpuinit start_secondary(void)
preempt_disable(); preempt_disable();
notify_cpu_starting(smp_processor_id());
local_irq_enable(); local_irq_enable();
calibrate_delay(); calibrate_delay();
......
...@@ -88,6 +88,7 @@ void __init smp4d_callin(void) ...@@ -88,6 +88,7 @@ void __init smp4d_callin(void)
local_flush_cache_all(); local_flush_cache_all();
local_flush_tlb_all(); local_flush_tlb_all();
notify_cpu_starting(cpuid);
/* /*
* Unblock the master CPU _only_ when the scheduler state * Unblock the master CPU _only_ when the scheduler state
* of all secondary CPUs will be up-to-date, so after * of all secondary CPUs will be up-to-date, so after
......
...@@ -71,6 +71,8 @@ void __cpuinit smp4m_callin(void) ...@@ -71,6 +71,8 @@ void __cpuinit smp4m_callin(void)
local_flush_cache_all(); local_flush_cache_all();
local_flush_tlb_all(); local_flush_tlb_all();
notify_cpu_starting(cpuid);
/* Get our local ticker going. */ /* Get our local ticker going. */
smp_setup_percpu_timer(); smp_setup_percpu_timer();
......
...@@ -85,6 +85,7 @@ static int idle_proc(void *cpup) ...@@ -85,6 +85,7 @@ static int idle_proc(void *cpup)
while (!cpu_isset(cpu, smp_commenced_mask)) while (!cpu_isset(cpu, smp_commenced_mask))
cpu_relax(); cpu_relax();
notify_cpu_starting(cpu);
cpu_set(cpu, cpu_online_map); cpu_set(cpu, cpu_online_map);
default_idle(); default_idle();
return 0; return 0;
......
...@@ -257,6 +257,7 @@ static void __cpuinit smp_callin(void) ...@@ -257,6 +257,7 @@ static void __cpuinit smp_callin(void)
end_local_APIC_setup(); end_local_APIC_setup();
map_cpu_to_logical_apicid(); map_cpu_to_logical_apicid();
notify_cpu_starting(cpuid);
/* /*
* Get our bogomips. * Get our bogomips.
* *
......
...@@ -448,6 +448,8 @@ static void __init start_secondary(void *unused) ...@@ -448,6 +448,8 @@ static void __init start_secondary(void *unused)
VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid)); VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid));
notify_cpu_starting(cpuid);
/* enable interrupts */ /* enable interrupts */
local_irq_enable(); local_irq_enable();
......
...@@ -10,6 +10,18 @@ ...@@ -10,6 +10,18 @@
#include <linux/wait.h> #include <linux/wait.h>
/**
* struct completion - structure used to maintain state for a "completion"
*
* This is the opaque structure used to maintain the state for a "completion".
* Completions currently use a FIFO to queue threads that have to wait for
* the "completion" event.
*
* See also: complete(), wait_for_completion() (and friends _timeout,
* _interruptible, _interruptible_timeout, and _killable), init_completion(),
* and macros DECLARE_COMPLETION(), DECLARE_COMPLETION_ONSTACK(), and
* INIT_COMPLETION().
*/
struct completion { struct completion {
unsigned int done; unsigned int done;
wait_queue_head_t wait; wait_queue_head_t wait;
...@@ -21,6 +33,14 @@ struct completion { ...@@ -21,6 +33,14 @@ struct completion {
#define COMPLETION_INITIALIZER_ONSTACK(work) \ #define COMPLETION_INITIALIZER_ONSTACK(work) \
({ init_completion(&work); work; }) ({ init_completion(&work); work; })
/**
* DECLARE_COMPLETION: - declare and initialize a completion structure
* @work: identifier for the completion structure
*
* This macro declares and initializes a completion structure. Generally used
* for static declarations. You should use the _ONSTACK variant for automatic
* variables.
*/
#define DECLARE_COMPLETION(work) \ #define DECLARE_COMPLETION(work) \
struct completion work = COMPLETION_INITIALIZER(work) struct completion work = COMPLETION_INITIALIZER(work)
...@@ -29,6 +49,13 @@ struct completion { ...@@ -29,6 +49,13 @@ struct completion {
* completions - so we use the _ONSTACK() variant for those that * completions - so we use the _ONSTACK() variant for those that
* are on the kernel stack: * are on the kernel stack:
*/ */
/**
* DECLARE_COMPLETION_ONSTACK: - declare and initialize a completion structure
* @work: identifier for the completion structure
*
* This macro declares and initializes a completion structure on the kernel
* stack.
*/
#ifdef CONFIG_LOCKDEP #ifdef CONFIG_LOCKDEP
# define DECLARE_COMPLETION_ONSTACK(work) \ # define DECLARE_COMPLETION_ONSTACK(work) \
struct completion work = COMPLETION_INITIALIZER_ONSTACK(work) struct completion work = COMPLETION_INITIALIZER_ONSTACK(work)
...@@ -36,6 +63,13 @@ struct completion { ...@@ -36,6 +63,13 @@ struct completion {
# define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work) # define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
#endif #endif
/**
* init_completion: - Initialize a dynamically allocated completion
* @x: completion structure that is to be initialized
*
* This inline function will initialize a dynamically created completion
* structure.
*/
static inline void init_completion(struct completion *x) static inline void init_completion(struct completion *x)
{ {
x->done = 0; x->done = 0;
...@@ -55,6 +89,13 @@ extern bool completion_done(struct completion *x); ...@@ -55,6 +89,13 @@ extern bool completion_done(struct completion *x);
extern void complete(struct completion *); extern void complete(struct completion *);
extern void complete_all(struct completion *); extern void complete_all(struct completion *);
/**
* INIT_COMPLETION: - reinitialize a completion structure
* @x: completion structure to be reinitialized
*
* This macro should be used to reinitialize a completion structure so it can
* be reused. This is especially important after complete_all() is used.
*/
#define INIT_COMPLETION(x) ((x).done = 0) #define INIT_COMPLETION(x) ((x).done = 0)
......
...@@ -69,6 +69,7 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb) ...@@ -69,6 +69,7 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb)
#endif #endif
int cpu_up(unsigned int cpu); int cpu_up(unsigned int cpu);
void notify_cpu_starting(unsigned int cpu);
extern void cpu_hotplug_init(void); extern void cpu_hotplug_init(void);
extern void cpu_maps_update_begin(void); extern void cpu_maps_update_begin(void);
extern void cpu_maps_update_done(void); extern void cpu_maps_update_done(void);
......
...@@ -213,9 +213,16 @@ static inline int notifier_to_errno(int ret) ...@@ -213,9 +213,16 @@ static inline int notifier_to_errno(int ret)
#define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */ #define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */
#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ #define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */
#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task, #define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task,
* not handling interrupts, soon dead */ * not handling interrupts, soon dead.
* Called on the dying cpu, interrupts
* are already disabled. Must not
* sleep, must not fail */
#define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug #define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug
* lock is dropped */ * lock is dropped */
#define CPU_STARTING 0x000A /* CPU (unsigned)v soon running.
* Called on the new cpu, just before
* enabling interrupts. Must not sleep,
* must not fail */
/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
* operation in progress * operation in progress
...@@ -229,6 +236,7 @@ static inline int notifier_to_errno(int ret) ...@@ -229,6 +236,7 @@ static inline int notifier_to_errno(int ret)
#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) #define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) #define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN)
#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN) #define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN)
#define CPU_STARTING_FROZEN (CPU_STARTING | CPU_TASKS_FROZEN)
/* Hibernation and suspend events */ /* Hibernation and suspend events */
#define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */ #define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */
......
...@@ -104,8 +104,8 @@ struct prop_local_single { ...@@ -104,8 +104,8 @@ struct prop_local_single {
* snapshot of the last seen global state * snapshot of the last seen global state
* and a lock protecting this state * and a lock protecting this state
*/ */
int shift;
unsigned long period; unsigned long period;
int shift;
spinlock_t lock; /* protect the snapshot state */ spinlock_t lock; /* protect the snapshot state */
}; };
......
...@@ -451,8 +451,8 @@ struct signal_struct { ...@@ -451,8 +451,8 @@ struct signal_struct {
* - everyone except group_exit_task is stopped during signal delivery * - everyone except group_exit_task is stopped during signal delivery
* of fatal signals, group_exit_task processes the signal. * of fatal signals, group_exit_task processes the signal.
*/ */
struct task_struct *group_exit_task;
int notify_count; int notify_count;
struct task_struct *group_exit_task;
/* thread group stop support, overloads group_exit_code too */ /* thread group stop support, overloads group_exit_code too */
int group_stop_count; int group_stop_count;
...@@ -824,6 +824,9 @@ struct sched_domain { ...@@ -824,6 +824,9 @@ struct sched_domain {
unsigned int ttwu_move_affine; unsigned int ttwu_move_affine;
unsigned int ttwu_move_balance; unsigned int ttwu_move_balance;
#endif #endif
#ifdef CONFIG_SCHED_DEBUG
char *name;
#endif
}; };
extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
...@@ -897,7 +900,7 @@ struct sched_class { ...@@ -897,7 +900,7 @@ struct sched_class {
void (*yield_task) (struct rq *rq); void (*yield_task) (struct rq *rq);
int (*select_task_rq)(struct task_struct *p, int sync); int (*select_task_rq)(struct task_struct *p, int sync);
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync);
struct task_struct * (*pick_next_task) (struct rq *rq); struct task_struct * (*pick_next_task) (struct rq *rq);
void (*put_prev_task) (struct rq *rq, struct task_struct *p); void (*put_prev_task) (struct rq *rq, struct task_struct *p);
...@@ -1010,8 +1013,8 @@ struct sched_entity { ...@@ -1010,8 +1013,8 @@ struct sched_entity {
struct sched_rt_entity { struct sched_rt_entity {
struct list_head run_list; struct list_head run_list;
unsigned int time_slice;
unsigned long timeout; unsigned long timeout;
unsigned int time_slice;
int nr_cpus_allowed; int nr_cpus_allowed;
struct sched_rt_entity *back; struct sched_rt_entity *back;
......
...@@ -199,13 +199,14 @@ static int __ref take_cpu_down(void *_param) ...@@ -199,13 +199,14 @@ static int __ref take_cpu_down(void *_param)
struct take_cpu_down_param *param = _param; struct take_cpu_down_param *param = _param;
int err; int err;
raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
param->hcpu);
/* Ensure this CPU doesn't handle any more interrupts. */ /* Ensure this CPU doesn't handle any more interrupts. */
err = __cpu_disable(); err = __cpu_disable();
if (err < 0) if (err < 0)
return err; return err;
raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
param->hcpu);
/* Force idle task to run as soon as we yield: it should /* Force idle task to run as soon as we yield: it should
immediately notice cpu is offline and die quickly. */ immediately notice cpu is offline and die quickly. */
sched_idle_next(); sched_idle_next();
...@@ -453,6 +454,25 @@ void __ref enable_nonboot_cpus(void) ...@@ -453,6 +454,25 @@ void __ref enable_nonboot_cpus(void)
} }
#endif /* CONFIG_PM_SLEEP_SMP */ #endif /* CONFIG_PM_SLEEP_SMP */
/**
* notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
* @cpu: cpu that just started
*
* This function calls the cpu_chain notifiers with CPU_STARTING.
* It must be called by the arch code on the new cpu, before the new cpu
* enables interrupts and before the "boot" cpu returns from __cpu_up().
*/
void notify_cpu_starting(unsigned int cpu)
{
unsigned long val = CPU_STARTING;
#ifdef CONFIG_PM_SLEEP_SMP
if (cpu_isset(cpu, frozen_cpus))
val = CPU_STARTING_FROZEN;
#endif /* CONFIG_PM_SLEEP_SMP */
raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
}
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
/* /*
......
...@@ -1921,7 +1921,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) ...@@ -1921,7 +1921,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
* that has tasks along with an empty 'mems'. But if we did see such * that has tasks along with an empty 'mems'. But if we did see such
* a cpuset, we'd handle it just like we do if its 'cpus' was empty. * a cpuset, we'd handle it just like we do if its 'cpus' was empty.
*/ */
static void scan_for_empty_cpusets(const struct cpuset *root) static void scan_for_empty_cpusets(struct cpuset *root)
{ {
LIST_HEAD(queue); LIST_HEAD(queue);
struct cpuset *cp; /* scans cpusets being updated */ struct cpuset *cp; /* scans cpusets being updated */
......
This diff is collapsed.
This diff is collapsed.
...@@ -11,3 +11,4 @@ SCHED_FEAT(ASYM_GRAN, 1) ...@@ -11,3 +11,4 @@ SCHED_FEAT(ASYM_GRAN, 1)
SCHED_FEAT(LB_BIAS, 1) SCHED_FEAT(LB_BIAS, 1)
SCHED_FEAT(LB_WAKEUP_UPDATE, 1) SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
SCHED_FEAT(ASYM_EFF_LOAD, 1) SCHED_FEAT(ASYM_EFF_LOAD, 1)
SCHED_FEAT(WAKEUP_OVERLAP, 0)
...@@ -14,7 +14,7 @@ static int select_task_rq_idle(struct task_struct *p, int sync) ...@@ -14,7 +14,7 @@ static int select_task_rq_idle(struct task_struct *p, int sync)
/* /*
* Idle tasks are unconditionally rescheduled: * Idle tasks are unconditionally rescheduled:
*/ */
static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p) static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sync)
{ {
resched_task(rq->idle); resched_task(rq->idle);
} }
...@@ -76,7 +76,7 @@ static void switched_to_idle(struct rq *rq, struct task_struct *p, ...@@ -76,7 +76,7 @@ static void switched_to_idle(struct rq *rq, struct task_struct *p,
if (running) if (running)
resched_task(rq->curr); resched_task(rq->curr);
else else
check_preempt_curr(rq, p); check_preempt_curr(rq, p, 0);
} }
static void prio_changed_idle(struct rq *rq, struct task_struct *p, static void prio_changed_idle(struct rq *rq, struct task_struct *p,
...@@ -93,7 +93,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p, ...@@ -93,7 +93,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
if (p->prio > oldprio) if (p->prio > oldprio)
resched_task(rq->curr); resched_task(rq->curr);
} else } else
check_preempt_curr(rq, p); check_preempt_curr(rq, p, 0);
} }
/* /*
......
...@@ -102,12 +102,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se); ...@@ -102,12 +102,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
{ {
struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
struct sched_rt_entity *rt_se = rt_rq->rt_se; struct sched_rt_entity *rt_se = rt_rq->rt_se;
if (rt_se && !on_rt_rq(rt_se) && rt_rq->rt_nr_running) { if (rt_rq->rt_nr_running) {
struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; if (rt_se && !on_rt_rq(rt_se))
enqueue_rt_entity(rt_se);
enqueue_rt_entity(rt_se);
if (rt_rq->highest_prio < curr->prio) if (rt_rq->highest_prio < curr->prio)
resched_task(curr); resched_task(curr);
} }
...@@ -231,6 +231,9 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) ...@@ -231,6 +231,9 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
#endif /* CONFIG_RT_GROUP_SCHED */ #endif /* CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/*
* We ran out of runtime, see if we can borrow some from our neighbours.
*/
static int do_balance_runtime(struct rt_rq *rt_rq) static int do_balance_runtime(struct rt_rq *rt_rq)
{ {
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
...@@ -250,9 +253,18 @@ static int do_balance_runtime(struct rt_rq *rt_rq) ...@@ -250,9 +253,18 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
continue; continue;
spin_lock(&iter->rt_runtime_lock); spin_lock(&iter->rt_runtime_lock);
/*
* Either all rqs have inf runtime and there's nothing to steal
* or __disable_runtime() below sets a specific rq to inf to
* indicate its been disabled and disalow stealing.
*/
if (iter->rt_runtime == RUNTIME_INF) if (iter->rt_runtime == RUNTIME_INF)
goto next; goto next;
/*
* From runqueues with spare time, take 1/n part of their
* spare time, but no more than our period.
*/
diff = iter->rt_runtime - iter->rt_time; diff = iter->rt_runtime - iter->rt_time;
if (diff > 0) { if (diff > 0) {
diff = div_u64((u64)diff, weight); diff = div_u64((u64)diff, weight);
...@@ -274,6 +286,9 @@ static int do_balance_runtime(struct rt_rq *rt_rq) ...@@ -274,6 +286,9 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
return more; return more;
} }
/*
* Ensure this RQ takes back all the runtime it lend to its neighbours.
*/
static void __disable_runtime(struct rq *rq) static void __disable_runtime(struct rq *rq)
{ {
struct root_domain *rd = rq->rd; struct root_domain *rd = rq->rd;
...@@ -289,17 +304,33 @@ static void __disable_runtime(struct rq *rq) ...@@ -289,17 +304,33 @@ static void __disable_runtime(struct rq *rq)
spin_lock(&rt_b->rt_runtime_lock); spin_lock(&rt_b->rt_runtime_lock);
spin_lock(&rt_rq->rt_runtime_lock); spin_lock(&rt_rq->rt_runtime_lock);
/*
* Either we're all inf and nobody needs to borrow, or we're
* already disabled and thus have nothing to do, or we have
* exactly the right amount of runtime to take out.
*/
if (rt_rq->rt_runtime == RUNTIME_INF || if (rt_rq->rt_runtime == RUNTIME_INF ||
rt_rq->rt_runtime == rt_b->rt_runtime) rt_rq->rt_runtime == rt_b->rt_runtime)
goto balanced; goto balanced;
spin_unlock(&rt_rq->rt_runtime_lock); spin_unlock(&rt_rq->rt_runtime_lock);
/*
* Calculate the difference between what we started out with
* and what we current have, that's the amount of runtime
* we lend and now have to reclaim.
*/
want = rt_b->rt_runtime - rt_rq->rt_runtime; want = rt_b->rt_runtime - rt_rq->rt_runtime;
/*
* Greedy reclaim, take back as much as we can.
*/
for_each_cpu_mask(i, rd->span) { for_each_cpu_mask(i, rd->span) {
struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
s64 diff; s64 diff;
/*
* Can't reclaim from ourselves or disabled runqueues.
*/
if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
continue; continue;
...@@ -319,8 +350,16 @@ static void __disable_runtime(struct rq *rq) ...@@ -319,8 +350,16 @@ static void __disable_runtime(struct rq *rq)
} }
spin_lock(&rt_rq->rt_runtime_lock); spin_lock(&rt_rq->rt_runtime_lock);
/*
* We cannot be left wanting - that would mean some runtime
* leaked out of the system.
*/
BUG_ON(want); BUG_ON(want);
balanced: balanced:
/*
* Disable all the borrow logic by pretending we have inf
* runtime - in which case borrowing doesn't make sense.
*/
rt_rq->rt_runtime = RUNTIME_INF; rt_rq->rt_runtime = RUNTIME_INF;
spin_unlock(&rt_rq->rt_runtime_lock); spin_unlock(&rt_rq->rt_runtime_lock);
spin_unlock(&rt_b->rt_runtime_lock); spin_unlock(&rt_b->rt_runtime_lock);
...@@ -343,6 +382,9 @@ static void __enable_runtime(struct rq *rq) ...@@ -343,6 +382,9 @@ static void __enable_runtime(struct rq *rq)
if (unlikely(!scheduler_running)) if (unlikely(!scheduler_running))
return; return;
/*
* Reset each runqueue's bandwidth settings
*/
for_each_leaf_rt_rq(rt_rq, rq) { for_each_leaf_rt_rq(rt_rq, rq) {
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
...@@ -389,7 +431,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) ...@@ -389,7 +431,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
int i, idle = 1; int i, idle = 1;
cpumask_t span; cpumask_t span;
if (rt_b->rt_runtime == RUNTIME_INF) if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
return 1; return 1;
span = sched_rt_period_mask(); span = sched_rt_period_mask();
...@@ -487,6 +529,9 @@ static void update_curr_rt(struct rq *rq) ...@@ -487,6 +529,9 @@ static void update_curr_rt(struct rq *rq)
curr->se.exec_start = rq->clock; curr->se.exec_start = rq->clock;
cpuacct_charge(curr, delta_exec); cpuacct_charge(curr, delta_exec);
if (!rt_bandwidth_enabled())
return;
for_each_sched_rt_entity(rt_se) { for_each_sched_rt_entity(rt_se) {
rt_rq = rt_rq_of_se(rt_se); rt_rq = rt_rq_of_se(rt_se);
...@@ -784,7 +829,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) ...@@ -784,7 +829,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
/* /*
* Preempt the current task with a newly woken task if needed: * Preempt the current task with a newly woken task if needed:
*/ */
static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync)
{ {
if (p->prio < rq->curr->prio) { if (p->prio < rq->curr->prio) {
resched_task(rq->curr); resched_task(rq->curr);
......
...@@ -169,7 +169,7 @@ static ssize_t cpu_rt_runtime_show(struct kobject *kobj, ...@@ -169,7 +169,7 @@ static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
{ {
struct user_struct *up = container_of(kobj, struct user_struct, kobj); struct user_struct *up = container_of(kobj, struct user_struct, kobj);
return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg)); return sprintf(buf, "%ld\n", sched_group_rt_runtime(up->tg));
} }
static ssize_t cpu_rt_runtime_store(struct kobject *kobj, static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
...@@ -180,7 +180,7 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj, ...@@ -180,7 +180,7 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
unsigned long rt_runtime; unsigned long rt_runtime;
int rc; int rc;
sscanf(buf, "%lu", &rt_runtime); sscanf(buf, "%ld", &rt_runtime);
rc = sched_group_set_rt_runtime(up->tg, rt_runtime); rc = sched_group_set_rt_runtime(up->tg, rt_runtime);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment