Commit d1ccc66d authored by Ingo Molnar's avatar Ingo Molnar

sched/core: Clean up comments

Refresh the comments in the core scheduler code:

 - Capitalize sentences consistently

 - Capitalize 'CPU' consistently

 - ... and other small details.

Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 975e155e
/* /*
* kernel/sched/core.c * kernel/sched/core.c
* *
* Kernel scheduler and related syscalls * Core kernel scheduler code and related syscalls
* *
* Copyright (C) 1991-2002 Linus Torvalds * Copyright (C) 1991-2002 Linus Torvalds
*
* 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and
* make semaphores SMP safe
* 1998-11-19 Implemented schedule_timeout() and related stuff
* by Andrea Arcangeli
* 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
* hybrid priority-list and round-robin design with
* an array-switch method of distributing timeslices
* and per-CPU runqueues. Cleanups and useful suggestions
* by Davide Libenzi, preemptible kernel bits by Robert Love.
* 2003-09-03 Interactivity tuning by Con Kolivas.
* 2004-04-02 Scheduler domains code by Nick Piggin
* 2007-04-15 Work begun on replacing all interactivity tuning with a
* fair scheduling design by Con Kolivas.
* 2007-05-05 Load balancing (smp-nice) and other improvements
* by Peter Williams
* 2007-05-06 Interactivity improvements to CFS by Mike Galbraith
* 2007-07-01 Group scheduling enhancements by Srivatsa Vaddagiri
* 2007-11-29 RT balancing improvements by Steven Rostedt, Gregory Haskins,
* Thomas Gleixner, Mike Kravetz
*/ */
#include <linux/kasan.h> #include <linux/kasan.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/module.h> #include <linux/module.h>
...@@ -143,7 +122,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; ...@@ -143,7 +122,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC; const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC;
/* /*
* period over which we measure -rt task cpu usage in us. * period over which we measure -rt task CPU usage in us.
* default: 1s * default: 1s
*/ */
unsigned int sysctl_sched_rt_period = 1000000; unsigned int sysctl_sched_rt_period = 1000000;
...@@ -156,7 +135,7 @@ __read_mostly int scheduler_running; ...@@ -156,7 +135,7 @@ __read_mostly int scheduler_running;
*/ */
int sysctl_sched_rt_runtime = 950000; int sysctl_sched_rt_runtime = 950000;
/* cpus with isolated domains */ /* CPUs with isolated domains */
cpumask_var_t cpu_isolated_map; cpumask_var_t cpu_isolated_map;
/* /*
...@@ -224,7 +203,7 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) ...@@ -224,7 +203,7 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
* If we observe the old cpu in task_rq_lock, the acquire of * If we observe the old cpu in task_rq_lock, the acquire of
* the old rq->lock will fully serialize against the stores. * the old rq->lock will fully serialize against the stores.
* *
* If we observe the new cpu in task_rq_lock, the acquire will * If we observe the new CPU in task_rq_lock, the acquire will
* pair with the WMB to ensure we must then also see migrating. * pair with the WMB to ensure we must then also see migrating.
*/ */
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
...@@ -461,7 +440,7 @@ void wake_up_q(struct wake_q_head *head) ...@@ -461,7 +440,7 @@ void wake_up_q(struct wake_q_head *head)
task = container_of(node, struct task_struct, wake_q); task = container_of(node, struct task_struct, wake_q);
BUG_ON(!task); BUG_ON(!task);
/* task can safely be re-inserted now */ /* Task can safely be re-inserted now: */
node = node->next; node = node->next;
task->wake_q.next = NULL; task->wake_q.next = NULL;
...@@ -519,12 +498,12 @@ void resched_cpu(int cpu) ...@@ -519,12 +498,12 @@ void resched_cpu(int cpu)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
/* /*
* In the semi idle case, use the nearest busy cpu for migrating timers * In the semi idle case, use the nearest busy CPU for migrating timers
* from an idle cpu. This is good for power-savings. * from an idle CPU. This is good for power-savings.
* *
* We don't do similar optimization for completely idle system, as * We don't do similar optimization for completely idle system, as
* selecting an idle cpu will add more delays to the timers than intended * selecting an idle CPU will add more delays to the timers than intended
* (as that cpu's timer base may not be uptodate wrt jiffies etc). * (as that CPU's timer base may not be uptodate wrt jiffies etc).
*/ */
int get_nohz_timer_target(void) int get_nohz_timer_target(void)
{ {
...@@ -553,6 +532,7 @@ int get_nohz_timer_target(void) ...@@ -553,6 +532,7 @@ int get_nohz_timer_target(void)
rcu_read_unlock(); rcu_read_unlock();
return cpu; return cpu;
} }
/* /*
* When add_timer_on() enqueues a timer into the timer wheel of an * When add_timer_on() enqueues a timer into the timer wheel of an
* idle CPU then this timer might expire before the next timer event * idle CPU then this timer might expire before the next timer event
...@@ -1021,7 +1001,7 @@ struct migration_arg { ...@@ -1021,7 +1001,7 @@ struct migration_arg {
}; };
/* /*
* Move (not current) task off this cpu, onto dest cpu. We're doing * Move (not current) task off this CPU, onto the destination CPU. We're doing
* this because either it can't run here any more (set_cpus_allowed() * this because either it can't run here any more (set_cpus_allowed()
* away from this CPU, or CPU going down), or because we're * away from this CPU, or CPU going down), or because we're
* attempting to rebalance this task on exec (sched_exec). * attempting to rebalance this task on exec (sched_exec).
...@@ -1055,8 +1035,8 @@ static int migration_cpu_stop(void *data) ...@@ -1055,8 +1035,8 @@ static int migration_cpu_stop(void *data)
struct rq *rq = this_rq(); struct rq *rq = this_rq();
/* /*
* The original target cpu might have gone down and we might * The original target CPU might have gone down and we might
* be on another cpu but it doesn't matter. * be on another CPU but it doesn't matter.
*/ */
local_irq_disable(); local_irq_disable();
/* /*
...@@ -1174,7 +1154,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, ...@@ -1174,7 +1154,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
if (p->flags & PF_KTHREAD) { if (p->flags & PF_KTHREAD) {
/* /*
* For kernel threads that do indeed end up on online && * For kernel threads that do indeed end up on online &&
* !active we want to ensure they are strict per-cpu threads. * !active we want to ensure they are strict per-CPU threads.
*/ */
WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
!cpumask_intersects(new_mask, cpu_active_mask) && !cpumask_intersects(new_mask, cpu_active_mask) &&
...@@ -1279,7 +1259,7 @@ static void __migrate_swap_task(struct task_struct *p, int cpu) ...@@ -1279,7 +1259,7 @@ static void __migrate_swap_task(struct task_struct *p, int cpu)
/* /*
* Task isn't running anymore; make it appear like we migrated * Task isn't running anymore; make it appear like we migrated
* it before it went to sleep. This means on wakeup we make the * it before it went to sleep. This means on wakeup we make the
* previous cpu our target instead of where it really is. * previous CPU our target instead of where it really is.
*/ */
p->wake_cpu = cpu; p->wake_cpu = cpu;
} }
...@@ -1511,12 +1491,12 @@ EXPORT_SYMBOL_GPL(kick_process); ...@@ -1511,12 +1491,12 @@ EXPORT_SYMBOL_GPL(kick_process);
* *
* - on cpu-up we allow per-cpu kthreads on the online && !active cpu, * - on cpu-up we allow per-cpu kthreads on the online && !active cpu,
* see __set_cpus_allowed_ptr(). At this point the newly online * see __set_cpus_allowed_ptr(). At this point the newly online
* cpu isn't yet part of the sched domains, and balancing will not * CPU isn't yet part of the sched domains, and balancing will not
* see it. * see it.
* *
* - on cpu-down we clear cpu_active() to mask the sched domains and * - on CPU-down we clear cpu_active() to mask the sched domains and
* avoid the load balancer to place new tasks on the to be removed * avoid the load balancer to place new tasks on the to be removed
* cpu. Existing tasks will remain running there and will be taken * CPU. Existing tasks will remain running there and will be taken
* off. * off.
* *
* This means that fallback selection must not select !active CPUs. * This means that fallback selection must not select !active CPUs.
...@@ -1532,9 +1512,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p) ...@@ -1532,9 +1512,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
int dest_cpu; int dest_cpu;
/* /*
* If the node that the cpu is on has been offlined, cpu_to_node() * If the node that the CPU is on has been offlined, cpu_to_node()
* will return -1. There is no cpu on the node, and we should * will return -1. There is no CPU on the node, and we should
* select the cpu on the other node. * select the CPU on the other node.
*/ */
if (nid != -1) { if (nid != -1) {
nodemask = cpumask_of_node(nid); nodemask = cpumask_of_node(nid);
...@@ -1566,7 +1546,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) ...@@ -1566,7 +1546,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
state = possible; state = possible;
break; break;
} }
/* fall-through */ /* Fall-through */
case possible: case possible:
do_set_cpus_allowed(p, cpu_possible_mask); do_set_cpus_allowed(p, cpu_possible_mask);
state = fail; state = fail;
...@@ -1610,7 +1590,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) ...@@ -1610,7 +1590,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
/* /*
* In order not to call set_task_cpu() on a blocking task we need * In order not to call set_task_cpu() on a blocking task we need
* to rely on ttwu() to place the task on a valid ->cpus_allowed * to rely on ttwu() to place the task on a valid ->cpus_allowed
* cpu. * CPU.
* *
* Since this is common to all placement strategies, this lives here. * Since this is common to all placement strategies, this lives here.
* *
...@@ -1684,7 +1664,7 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl ...@@ -1684,7 +1664,7 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl
activate_task(rq, p, en_flags); activate_task(rq, p, en_flags);
p->on_rq = TASK_ON_RQ_QUEUED; p->on_rq = TASK_ON_RQ_QUEUED;
/* if a worker is waking up, notify workqueue */ /* If a worker is waking up, notify the workqueue: */
if (p->flags & PF_WQ_WORKER) if (p->flags & PF_WQ_WORKER)
wq_worker_waking_up(p, cpu_of(rq)); wq_worker_waking_up(p, cpu_of(rq));
} }
...@@ -1867,7 +1847,7 @@ void wake_up_if_idle(int cpu) ...@@ -1867,7 +1847,7 @@ void wake_up_if_idle(int cpu)
raw_spin_lock_irqsave(&rq->lock, flags); raw_spin_lock_irqsave(&rq->lock, flags);
if (is_idle_task(rq->curr)) if (is_idle_task(rq->curr))
smp_send_reschedule(cpu); smp_send_reschedule(cpu);
/* Else cpu is not in idle, do nothing here */ /* Else CPU is not idle, do nothing here: */
raw_spin_unlock_irqrestore(&rq->lock, flags); raw_spin_unlock_irqrestore(&rq->lock, flags);
} }
...@@ -1888,7 +1868,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) ...@@ -1888,7 +1868,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
#if defined(CONFIG_SMP) #if defined(CONFIG_SMP)
if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) { if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
sched_clock_cpu(cpu); /* sync clocks x-cpu */ sched_clock_cpu(cpu); /* Sync clocks across CPUs */
ttwu_queue_remote(p, cpu, wake_flags); ttwu_queue_remote(p, cpu, wake_flags);
return; return;
} }
...@@ -1907,8 +1887,8 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) ...@@ -1907,8 +1887,8 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
* MIGRATION * MIGRATION
* *
* The basic program-order guarantee on SMP systems is that when a task [t] * The basic program-order guarantee on SMP systems is that when a task [t]
* migrates, all its activity on its old cpu [c0] happens-before any subsequent * migrates, all its activity on its old CPU [c0] happens-before any subsequent
* execution on its new cpu [c1]. * execution on its new CPU [c1].
* *
* For migration (of runnable tasks) this is provided by the following means: * For migration (of runnable tasks) this is provided by the following means:
* *
...@@ -1919,7 +1899,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) ...@@ -1919,7 +1899,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
* *
* Transitivity guarantees that B happens after A and C after B. * Transitivity guarantees that B happens after A and C after B.
* Note: we only require RCpc transitivity. * Note: we only require RCpc transitivity.
* Note: the cpu doing B need not be c0 or c1 * Note: the CPU doing B need not be c0 or c1
* *
* Example: * Example:
* *
...@@ -2027,7 +2007,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) ...@@ -2027,7 +2007,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
trace_sched_waking(p); trace_sched_waking(p);
success = 1; /* we're going to change ->state */ /* We're going to change ->state: */
success = 1;
cpu = task_cpu(p); cpu = task_cpu(p);
/* /*
...@@ -2076,7 +2057,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) ...@@ -2076,7 +2057,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
smp_rmb(); smp_rmb();
/* /*
* If the owning (remote) cpu is still in the middle of schedule() with * If the owning (remote) CPU is still in the middle of schedule() with
* this task as prev, wait until its done referencing the task. * this task as prev, wait until its done referencing the task.
* *
* Pairs with the smp_store_release() in finish_lock_switch(). * Pairs with the smp_store_release() in finish_lock_switch().
...@@ -2448,7 +2429,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) ...@@ -2448,7 +2429,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
*/ */
raw_spin_lock_irqsave(&p->pi_lock, flags); raw_spin_lock_irqsave(&p->pi_lock, flags);
/* /*
* We're setting the cpu for the first time, we don't migrate, * We're setting the CPU for the first time, we don't migrate,
* so use __set_task_cpu(). * so use __set_task_cpu().
*/ */
__set_task_cpu(p, cpu); __set_task_cpu(p, cpu);
...@@ -2591,7 +2572,7 @@ void wake_up_new_task(struct task_struct *p) ...@@ -2591,7 +2572,7 @@ void wake_up_new_task(struct task_struct *p)
/* /*
* Fork balancing, do it here and not earlier because: * Fork balancing, do it here and not earlier because:
* - cpus_allowed can change in the fork path * - cpus_allowed can change in the fork path
* - any previously selected cpu might disappear through hotplug * - any previously selected CPU might disappear through hotplug
* *
* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
* as we're not fully set-up yet. * as we're not fully set-up yet.
...@@ -2945,7 +2926,7 @@ unsigned long nr_running(void) ...@@ -2945,7 +2926,7 @@ unsigned long nr_running(void)
} }
/* /*
* Check if only the current task is running on the cpu. * Check if only the current task is running on the CPU.
* *
* Caution: this function does not check that the caller has disabled * Caution: this function does not check that the caller has disabled
* preemption, thus the result might have a time-of-check-to-time-of-use * preemption, thus the result might have a time-of-check-to-time-of-use
...@@ -3104,8 +3085,8 @@ unsigned long long task_sched_runtime(struct task_struct *p) ...@@ -3104,8 +3085,8 @@ unsigned long long task_sched_runtime(struct task_struct *p)
* So we have a optimization chance when the task's delta_exec is 0. * So we have a optimization chance when the task's delta_exec is 0.
* Reading ->on_cpu is racy, but this is ok. * Reading ->on_cpu is racy, but this is ok.
* *
* If we race with it leaving cpu, we'll take a lock. So we're correct. * If we race with it leaving CPU, we'll take a lock. So we're correct.
* If we race with it entering cpu, unaccounted time is 0. This is * If we race with it entering CPU, unaccounted time is 0. This is
* indistinguishable from the read occurring a few cycles earlier. * indistinguishable from the read occurring a few cycles earlier.
* If we see ->on_cpu without ->on_rq, the task is leaving, and has * If we see ->on_cpu without ->on_rq, the task is leaving, and has
* been accounted, so we're correct here as well. * been accounted, so we're correct here as well.
...@@ -3333,7 +3314,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) ...@@ -3333,7 +3314,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
if (unlikely(p == RETRY_TASK)) if (unlikely(p == RETRY_TASK))
goto again; goto again;
/* assumes fair_sched_class->next == idle_sched_class */ /* Assumes fair_sched_class->next == idle_sched_class */
if (unlikely(!p)) if (unlikely(!p))
p = idle_sched_class.pick_next_task(rq, prev, rf); p = idle_sched_class.pick_next_task(rq, prev, rf);
...@@ -3350,7 +3331,8 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) ...@@ -3350,7 +3331,8 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
} }
} }
BUG(); /* the idle class will always have a runnable task */ /* The idle class should always have a runnable task: */
BUG();
} }
/* /*
...@@ -3421,7 +3403,8 @@ static void __sched notrace __schedule(bool preempt) ...@@ -3421,7 +3403,8 @@ static void __sched notrace __schedule(bool preempt)
raw_spin_lock(&rq->lock); raw_spin_lock(&rq->lock);
rq_pin_lock(rq, &rf); rq_pin_lock(rq, &rf);
rq->clock_update_flags <<= 1; /* promote REQ to ACT */ /* Promote REQ to ACT */
rq->clock_update_flags <<= 1;
switch_count = &prev->nivcsw; switch_count = &prev->nivcsw;
if (!preempt && prev->state) { if (!preempt && prev->state) {
...@@ -3465,7 +3448,9 @@ static void __sched notrace __schedule(bool preempt) ...@@ -3465,7 +3448,9 @@ static void __sched notrace __schedule(bool preempt)
++*switch_count; ++*switch_count;
trace_sched_switch(preempt, prev, next); trace_sched_switch(preempt, prev, next);
rq = context_switch(rq, prev, next, &rf); /* unlocks the rq */
/* Also unlocks the rq: */
rq = context_switch(rq, prev, next, &rf);
} else { } else {
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
rq_unpin_lock(rq, &rf); rq_unpin_lock(rq, &rf);
...@@ -3492,14 +3477,18 @@ void __noreturn do_task_dead(void) ...@@ -3492,14 +3477,18 @@ void __noreturn do_task_dead(void)
smp_mb(); smp_mb();
raw_spin_unlock_wait(&current->pi_lock); raw_spin_unlock_wait(&current->pi_lock);
/* causes final put_task_struct in finish_task_switch(). */ /* Causes final put_task_struct in finish_task_switch(): */
__set_current_state(TASK_DEAD); __set_current_state(TASK_DEAD);
current->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
/* Tell freezer to ignore us: */
current->flags |= PF_NOFREEZE;
__schedule(false); __schedule(false);
BUG(); BUG();
/* Avoid "noreturn function does return". */
/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
for (;;) for (;;)
cpu_relax(); /* For when BUG is null */ cpu_relax();
} }
static inline void sched_submit_work(struct task_struct *tsk) static inline void sched_submit_work(struct task_struct *tsk)
...@@ -3792,7 +3781,8 @@ void rt_mutex_setprio(struct task_struct *p, int prio) ...@@ -3792,7 +3781,8 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
check_class_changed(rq, p, prev_class, oldprio); check_class_changed(rq, p, prev_class, oldprio);
out_unlock: out_unlock:
preempt_disable(); /* avoid rq from going away on us */ /* Avoid rq from going away on us: */
preempt_disable();
__task_rq_unlock(rq, &rf); __task_rq_unlock(rq, &rf);
balance_callback(rq); balance_callback(rq);
...@@ -3862,7 +3852,7 @@ EXPORT_SYMBOL(set_user_nice); ...@@ -3862,7 +3852,7 @@ EXPORT_SYMBOL(set_user_nice);
*/ */
int can_nice(const struct task_struct *p, const int nice) int can_nice(const struct task_struct *p, const int nice)
{ {
/* convert nice value [19,-20] to rlimit style value [1,40] */ /* Convert nice value [19,-20] to rlimit style value [1,40]: */
int nice_rlim = nice_to_rlimit(nice); int nice_rlim = nice_to_rlimit(nice);
return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) || return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
...@@ -3918,7 +3908,7 @@ int task_prio(const struct task_struct *p) ...@@ -3918,7 +3908,7 @@ int task_prio(const struct task_struct *p)
} }
/** /**
* idle_cpu - is a given cpu idle currently? * idle_cpu - is a given CPU idle currently?
* @cpu: the processor in question. * @cpu: the processor in question.
* *
* Return: 1 if the CPU is currently idle. 0 otherwise. * Return: 1 if the CPU is currently idle. 0 otherwise.
...@@ -3942,10 +3932,10 @@ int idle_cpu(int cpu) ...@@ -3942,10 +3932,10 @@ int idle_cpu(int cpu)
} }
/** /**
* idle_task - return the idle task for a given cpu. * idle_task - return the idle task for a given CPU.
* @cpu: the processor in question. * @cpu: the processor in question.
* *
* Return: The idle task for the cpu @cpu. * Return: The idle task for the CPU @cpu.
*/ */
struct task_struct *idle_task(int cpu) struct task_struct *idle_task(int cpu)
{ {
...@@ -4111,7 +4101,7 @@ __checkparam_dl(const struct sched_attr *attr) ...@@ -4111,7 +4101,7 @@ __checkparam_dl(const struct sched_attr *attr)
} }
/* /*
* check the target process has a UID that matches the current process's * Check the target process has a UID that matches the current process's:
*/ */
static bool check_same_owner(struct task_struct *p) static bool check_same_owner(struct task_struct *p)
{ {
...@@ -4126,8 +4116,7 @@ static bool check_same_owner(struct task_struct *p) ...@@ -4126,8 +4116,7 @@ static bool check_same_owner(struct task_struct *p)
return match; return match;
} }
static bool dl_param_changed(struct task_struct *p, static bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
const struct sched_attr *attr)
{ {
struct sched_dl_entity *dl_se = &p->dl; struct sched_dl_entity *dl_se = &p->dl;
...@@ -4154,10 +4143,10 @@ static int __sched_setscheduler(struct task_struct *p, ...@@ -4154,10 +4143,10 @@ static int __sched_setscheduler(struct task_struct *p,
int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE; int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
struct rq *rq; struct rq *rq;
/* may grab non-irq protected spin_locks */ /* May grab non-irq protected spin_locks: */
BUG_ON(in_interrupt()); BUG_ON(in_interrupt());
recheck: recheck:
/* double check policy once rq lock held */ /* Double check policy once rq lock held: */
if (policy < 0) { if (policy < 0) {
reset_on_fork = p->sched_reset_on_fork; reset_on_fork = p->sched_reset_on_fork;
policy = oldpolicy = p->policy; policy = oldpolicy = p->policy;
...@@ -4197,11 +4186,11 @@ static int __sched_setscheduler(struct task_struct *p, ...@@ -4197,11 +4186,11 @@ static int __sched_setscheduler(struct task_struct *p,
unsigned long rlim_rtprio = unsigned long rlim_rtprio =
task_rlimit(p, RLIMIT_RTPRIO); task_rlimit(p, RLIMIT_RTPRIO);
/* can't set/change the rt policy */ /* Can't set/change the rt policy: */
if (policy != p->policy && !rlim_rtprio) if (policy != p->policy && !rlim_rtprio)
return -EPERM; return -EPERM;
/* can't increase priority */ /* Can't increase priority: */
if (attr->sched_priority > p->rt_priority && if (attr->sched_priority > p->rt_priority &&
attr->sched_priority > rlim_rtprio) attr->sched_priority > rlim_rtprio)
return -EPERM; return -EPERM;
...@@ -4225,11 +4214,11 @@ static int __sched_setscheduler(struct task_struct *p, ...@@ -4225,11 +4214,11 @@ static int __sched_setscheduler(struct task_struct *p,
return -EPERM; return -EPERM;
} }
/* can't change other user's priorities */ /* Can't change other user's priorities: */
if (!check_same_owner(p)) if (!check_same_owner(p))
return -EPERM; return -EPERM;
/* Normal users shall not reset the sched_reset_on_fork flag */ /* Normal users shall not reset the sched_reset_on_fork flag: */
if (p->sched_reset_on_fork && !reset_on_fork) if (p->sched_reset_on_fork && !reset_on_fork)
return -EPERM; return -EPERM;
} }
...@@ -4241,7 +4230,7 @@ static int __sched_setscheduler(struct task_struct *p, ...@@ -4241,7 +4230,7 @@ static int __sched_setscheduler(struct task_struct *p,
} }
/* /*
* make sure no PI-waiters arrive (or leave) while we are * Make sure no PI-waiters arrive (or leave) while we are
* changing the priority of the task: * changing the priority of the task:
* *
* To be able to change p->policy safely, the appropriate * To be able to change p->policy safely, the appropriate
...@@ -4251,7 +4240,7 @@ static int __sched_setscheduler(struct task_struct *p, ...@@ -4251,7 +4240,7 @@ static int __sched_setscheduler(struct task_struct *p,
update_rq_clock(rq); update_rq_clock(rq);
/* /*
* Changing the policy of the stop threads its a very bad idea * Changing the policy of the stop threads its a very bad idea:
*/ */
if (p == rq->stop) { if (p == rq->stop) {
task_rq_unlock(rq, p, &rf); task_rq_unlock(rq, p, &rf);
...@@ -4307,7 +4296,7 @@ static int __sched_setscheduler(struct task_struct *p, ...@@ -4307,7 +4296,7 @@ static int __sched_setscheduler(struct task_struct *p,
#endif #endif
} }
/* recheck policy now with rq lock held */ /* Re-check policy now with rq lock held: */
if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
policy = oldpolicy = -1; policy = oldpolicy = -1;
task_rq_unlock(rq, p, &rf); task_rq_unlock(rq, p, &rf);
...@@ -4364,15 +4353,15 @@ static int __sched_setscheduler(struct task_struct *p, ...@@ -4364,15 +4353,15 @@ static int __sched_setscheduler(struct task_struct *p,
set_curr_task(rq, p); set_curr_task(rq, p);
check_class_changed(rq, p, prev_class, oldprio); check_class_changed(rq, p, prev_class, oldprio);
preempt_disable(); /* avoid rq from going away on us */
/* Avoid rq from going away on us: */
preempt_disable();
task_rq_unlock(rq, p, &rf); task_rq_unlock(rq, p, &rf);
if (pi) if (pi)
rt_mutex_adjust_pi(p); rt_mutex_adjust_pi(p);
/* /* Run balance callbacks after we've adjusted the PI chain: */
* Run balance callbacks after we've adjusted the PI chain.
*/
balance_callback(rq); balance_callback(rq);
preempt_enable(); preempt_enable();
...@@ -4465,8 +4454,7 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) ...@@ -4465,8 +4454,7 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
/* /*
* Mimics kernel/events/core.c perf_copy_attr(). * Mimics kernel/events/core.c perf_copy_attr().
*/ */
static int sched_copy_attr(struct sched_attr __user *uattr, static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr)
struct sched_attr *attr)
{ {
u32 size; u32 size;
int ret; int ret;
...@@ -4474,19 +4462,19 @@ static int sched_copy_attr(struct sched_attr __user *uattr, ...@@ -4474,19 +4462,19 @@ static int sched_copy_attr(struct sched_attr __user *uattr,
if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0)) if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0))
return -EFAULT; return -EFAULT;
/* /* Zero the full structure, so that a short copy will be nice: */
* zero the full structure, so that a short copy will be nice.
*/
memset(attr, 0, sizeof(*attr)); memset(attr, 0, sizeof(*attr));
ret = get_user(size, &uattr->size); ret = get_user(size, &uattr->size);
if (ret) if (ret)
return ret; return ret;
if (size > PAGE_SIZE) /* silly large */ /* Bail out on silly large: */
if (size > PAGE_SIZE)
goto err_size; goto err_size;
if (!size) /* abi compat */ /* ABI compatibility quirk: */
if (!size)
size = SCHED_ATTR_SIZE_VER0; size = SCHED_ATTR_SIZE_VER0;
if (size < SCHED_ATTR_SIZE_VER0) if (size < SCHED_ATTR_SIZE_VER0)
...@@ -4521,7 +4509,7 @@ static int sched_copy_attr(struct sched_attr __user *uattr, ...@@ -4521,7 +4509,7 @@ static int sched_copy_attr(struct sched_attr __user *uattr,
return -EFAULT; return -EFAULT;
/* /*
* XXX: do we want to be lenient like existing syscalls; or do we want * XXX: Do we want to be lenient like existing syscalls; or do we want
* to be strict and return an error on out-of-bounds values? * to be strict and return an error on out-of-bounds values?
*/ */
attr->sched_nice = clamp(attr->sched_nice, MIN_NICE, MAX_NICE); attr->sched_nice = clamp(attr->sched_nice, MIN_NICE, MAX_NICE);
...@@ -4541,10 +4529,8 @@ static int sched_copy_attr(struct sched_attr __user *uattr, ...@@ -4541,10 +4529,8 @@ static int sched_copy_attr(struct sched_attr __user *uattr,
* *
* Return: 0 on success. An error code otherwise. * Return: 0 on success. An error code otherwise.
*/ */
SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
struct sched_param __user *, param)
{ {
/* negative values for policy are not valid */
if (policy < 0) if (policy < 0)
return -EINVAL; return -EINVAL;
...@@ -4854,10 +4840,10 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, ...@@ -4854,10 +4840,10 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
} }
/** /**
* sys_sched_setaffinity - set the cpu affinity of a process * sys_sched_setaffinity - set the CPU affinity of a process
* @pid: pid of the process * @pid: pid of the process
* @len: length in bytes of the bitmask pointed to by user_mask_ptr * @len: length in bytes of the bitmask pointed to by user_mask_ptr
* @user_mask_ptr: user-space pointer to the new cpu mask * @user_mask_ptr: user-space pointer to the new CPU mask
* *
* Return: 0 on success. An error code otherwise. * Return: 0 on success. An error code otherwise.
*/ */
...@@ -4905,10 +4891,10 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) ...@@ -4905,10 +4891,10 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
} }
/** /**
* sys_sched_getaffinity - get the cpu affinity of a process * sys_sched_getaffinity - get the CPU affinity of a process
* @pid: pid of the process * @pid: pid of the process
* @len: length in bytes of the bitmask pointed to by user_mask_ptr * @len: length in bytes of the bitmask pointed to by user_mask_ptr
* @user_mask_ptr: user-space pointer to hold the current cpu mask * @user_mask_ptr: user-space pointer to hold the current CPU mask
* *
* Return: size of CPU mask copied to user_mask_ptr on success. An * Return: size of CPU mask copied to user_mask_ptr on success. An
* error code otherwise. * error code otherwise.
...@@ -5036,7 +5022,7 @@ EXPORT_SYMBOL(__cond_resched_softirq); ...@@ -5036,7 +5022,7 @@ EXPORT_SYMBOL(__cond_resched_softirq);
* Typical broken usage is: * Typical broken usage is:
* *
* while (!event) * while (!event)
* yield(); * yield();
* *
* where one assumes that yield() will let 'the other' process run that will * where one assumes that yield() will let 'the other' process run that will
* make event true. If the current task is a SCHED_FIFO task that will never * make event true. If the current task is a SCHED_FIFO task that will never
...@@ -5351,7 +5337,7 @@ void init_idle_bootup_task(struct task_struct *idle) ...@@ -5351,7 +5337,7 @@ void init_idle_bootup_task(struct task_struct *idle)
/** /**
* init_idle - set up an idle thread for a given CPU * init_idle - set up an idle thread for a given CPU
* @idle: task in question * @idle: task in question
* @cpu: cpu the idle task belongs to * @cpu: CPU the idle task belongs to
* *
* NOTE: this function does not set the idle thread's NEED_RESCHED * NOTE: this function does not set the idle thread's NEED_RESCHED
* flag, to make booting more robust. * flag, to make booting more robust.
...@@ -5382,7 +5368,7 @@ void init_idle(struct task_struct *idle, int cpu) ...@@ -5382,7 +5368,7 @@ void init_idle(struct task_struct *idle, int cpu)
#endif #endif
/* /*
* We're having a chicken and egg problem, even though we are * We're having a chicken and egg problem, even though we are
* holding rq->lock, the cpu isn't yet set to this cpu so the * holding rq->lock, the CPU isn't yet set to this CPU so the
* lockdep check in task_group() will fail. * lockdep check in task_group() will fail.
* *
* Similar case to sched_fork(). / Alternatively we could * Similar case to sched_fork(). / Alternatively we could
...@@ -5447,7 +5433,7 @@ int task_can_attach(struct task_struct *p, ...@@ -5447,7 +5433,7 @@ int task_can_attach(struct task_struct *p,
/* /*
* Kthreads which disallow setaffinity shouldn't be moved * Kthreads which disallow setaffinity shouldn't be moved
* to a new cpuset; we don't want to change their cpu * to a new cpuset; we don't want to change their CPU
* affinity and isolating such threads by their set of * affinity and isolating such threads by their set of
* allowed nodes is unnecessary. Thus, cpusets are not * allowed nodes is unnecessary. Thus, cpusets are not
* applicable for such threads. This prevents checking for * applicable for such threads. This prevents checking for
...@@ -5548,7 +5534,7 @@ void sched_setnuma(struct task_struct *p, int nid) ...@@ -5548,7 +5534,7 @@ void sched_setnuma(struct task_struct *p, int nid)
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
/* /*
* Ensures that the idle task is using init_mm right before its cpu goes * Ensure that the idle task is using init_mm right before its CPU goes
* offline. * offline.
*/ */
void idle_task_exit(void) void idle_task_exit(void)
...@@ -5632,13 +5618,13 @@ static void migrate_tasks(struct rq *dead_rq) ...@@ -5632,13 +5618,13 @@ static void migrate_tasks(struct rq *dead_rq)
for (;;) { for (;;) {
/* /*
* There's this thread running, bail when that's the only * There's this thread running, bail when that's the only
* remaining thread. * remaining thread:
*/ */
if (rq->nr_running == 1) if (rq->nr_running == 1)
break; break;
/* /*
* pick_next_task assumes pinned rq->lock. * pick_next_task() assumes pinned rq->lock:
*/ */
rq_pin_lock(rq, &rf); rq_pin_lock(rq, &rf);
next = pick_next_task(rq, &fake_task, &rf); next = pick_next_task(rq, &fake_task, &rf);
...@@ -5730,7 +5716,8 @@ static void set_cpu_rq_start_time(unsigned int cpu) ...@@ -5730,7 +5716,8 @@ static void set_cpu_rq_start_time(unsigned int cpu)
rq->age_stamp = sched_clock_cpu(cpu); rq->age_stamp = sched_clock_cpu(cpu);
} }
static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */ /* Protected by sched_domains_mutex: */
static cpumask_var_t sched_domains_tmpmask;
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
...@@ -5997,7 +5984,7 @@ static int init_rootdomain(struct root_domain *rd) ...@@ -5997,7 +5984,7 @@ static int init_rootdomain(struct root_domain *rd)
} }
/* /*
* By default the system creates a single root-domain with all cpus as * By default the system creates a single root-domain with all CPUs as
* members (mimicking the global state we have today). * members (mimicking the global state we have today).
*/ */
struct root_domain def_root_domain; struct root_domain def_root_domain;
...@@ -6083,9 +6070,9 @@ static void destroy_sched_domains(struct sched_domain *sd) ...@@ -6083,9 +6070,9 @@ static void destroy_sched_domains(struct sched_domain *sd)
* SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
* allows us to avoid some pointer chasing select_idle_sibling(). * allows us to avoid some pointer chasing select_idle_sibling().
* *
* Also keep a unique ID per domain (we use the first cpu number in * Also keep a unique ID per domain (we use the first CPU number in
* the cpumask of the domain), this allows us to quickly tell if * the cpumask of the domain), this allows us to quickly tell if
* two cpus are in the same cache domain, see cpus_share_cache(). * two CPUs are in the same cache domain, see cpus_share_cache().
*/ */
DEFINE_PER_CPU(struct sched_domain *, sd_llc); DEFINE_PER_CPU(struct sched_domain *, sd_llc);
DEFINE_PER_CPU(int, sd_llc_size); DEFINE_PER_CPU(int, sd_llc_size);
...@@ -6170,7 +6157,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) ...@@ -6170,7 +6157,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
update_top_cache_domain(cpu); update_top_cache_domain(cpu);
} }
/* Setup the mask of cpus configured for isolated domains */ /* Setup the mask of CPUs configured for isolated domains */
static int __init isolated_cpu_setup(char *str) static int __init isolated_cpu_setup(char *str)
{ {
int ret; int ret;
...@@ -6207,8 +6194,7 @@ enum s_alloc { ...@@ -6207,8 +6194,7 @@ enum s_alloc {
* *
* In that case build_sched_domains() will have terminated the iteration early * In that case build_sched_domains() will have terminated the iteration early
* and our sibling sd spans will be empty. Domains should always include the * and our sibling sd spans will be empty. Domains should always include the
* cpu they're built on, so check that. * CPU they're built on, so check that.
*
*/ */
static void build_group_mask(struct sched_domain *sd, struct sched_group *sg) static void build_group_mask(struct sched_domain *sd, struct sched_group *sg)
{ {
...@@ -6227,7 +6213,7 @@ static void build_group_mask(struct sched_domain *sd, struct sched_group *sg) ...@@ -6227,7 +6213,7 @@ static void build_group_mask(struct sched_domain *sd, struct sched_group *sg)
} }
/* /*
* Return the canonical balance cpu for this group, this is the first cpu * Return the canonical balance CPU for this group, this is the first CPU
* of this group that's also in the iteration mask. * of this group that's also in the iteration mask.
*/ */
int group_balance_cpu(struct sched_group *sg) int group_balance_cpu(struct sched_group *sg)
...@@ -6287,7 +6273,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) ...@@ -6287,7 +6273,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
/* /*
* Make sure the first group of this domain contains the * Make sure the first group of this domain contains the
* canonical balance cpu. Otherwise the sched_domain iteration * canonical balance CPU. Otherwise the sched_domain iteration
* breaks. See update_sg_lb_stats(). * breaks. See update_sg_lb_stats().
*/ */
if ((!groups && cpumask_test_cpu(cpu, sg_span)) || if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
...@@ -6322,7 +6308,9 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) ...@@ -6322,7 +6308,9 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
if (sg) { if (sg) {
*sg = *per_cpu_ptr(sdd->sg, cpu); *sg = *per_cpu_ptr(sdd->sg, cpu);
(*sg)->sgc = *per_cpu_ptr(sdd->sgc, cpu); (*sg)->sgc = *per_cpu_ptr(sdd->sgc, cpu);
atomic_set(&(*sg)->sgc->ref, 1); /* for claim_allocations */
/* For claim_allocations: */
atomic_set(&(*sg)->sgc->ref, 1);
} }
return cpu; return cpu;
...@@ -6456,10 +6444,10 @@ static void set_domain_attribute(struct sched_domain *sd, ...@@ -6456,10 +6444,10 @@ static void set_domain_attribute(struct sched_domain *sd,
} else } else
request = attr->relax_domain_level; request = attr->relax_domain_level;
if (request < sd->level) { if (request < sd->level) {
/* turn off idle balance on this domain */ /* Turn off idle balance on this domain: */
sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
} else { } else {
/* turn on idle balance on this domain */ /* Turn on idle balance on this domain: */
sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
} }
} }
...@@ -6473,18 +6461,21 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what, ...@@ -6473,18 +6461,21 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
switch (what) { switch (what) {
case sa_rootdomain: case sa_rootdomain:
if (!atomic_read(&d->rd->refcount)) if (!atomic_read(&d->rd->refcount))
free_rootdomain(&d->rd->rcu); /* fall through */ free_rootdomain(&d->rd->rcu);
/* Fall through */
case sa_sd: case sa_sd:
free_percpu(d->sd); /* fall through */ free_percpu(d->sd);
/* Fall through */
case sa_sd_storage: case sa_sd_storage:
__sdt_free(cpu_map); /* fall through */ __sdt_free(cpu_map);
/* Fall through */
case sa_none: case sa_none:
break; break;
} }
} }
static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, static enum s_alloc
const struct cpumask *cpu_map) __visit_domain_allocation_hell(struct s_data *d, const struct cpumask *cpu_map)
{ {
memset(d, 0, sizeof(*d)); memset(d, 0, sizeof(*d));
...@@ -6883,7 +6874,7 @@ static void sched_init_numa(void) ...@@ -6883,7 +6874,7 @@ static void sched_init_numa(void)
/* /*
* Now for each level, construct a mask per node which contains all * Now for each level, construct a mask per node which contains all
* cpus of nodes that are that many hops away from us. * CPUs of nodes that are that many hops away from us.
*/ */
for (i = 0; i < level; i++) { for (i = 0; i < level; i++) {
sched_domains_numa_masks[i] = sched_domains_numa_masks[i] =
...@@ -7103,11 +7094,11 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, ...@@ -7103,11 +7094,11 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
} }
/* /*
* Build sched domains for a given set of cpus and attach the sched domains * Build sched domains for a given set of CPUs and attach the sched domains
* to the individual cpus * to the individual CPUs
*/ */
static int build_sched_domains(const struct cpumask *cpu_map, static int
struct sched_domain_attr *attr) build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr)
{ {
enum s_alloc alloc_state; enum s_alloc alloc_state;
struct sched_domain *sd; struct sched_domain *sd;
...@@ -7119,7 +7110,7 @@ static int build_sched_domains(const struct cpumask *cpu_map, ...@@ -7119,7 +7110,7 @@ static int build_sched_domains(const struct cpumask *cpu_map,
if (alloc_state != sa_rootdomain) if (alloc_state != sa_rootdomain)
goto error; goto error;
/* Set up domains for cpus specified by the cpu_map. */ /* Set up domains for CPUs specified by the cpu_map: */
for_each_cpu(i, cpu_map) { for_each_cpu(i, cpu_map) {
struct sched_domain_topology_level *tl; struct sched_domain_topology_level *tl;
...@@ -7185,21 +7176,25 @@ static int build_sched_domains(const struct cpumask *cpu_map, ...@@ -7185,21 +7176,25 @@ static int build_sched_domains(const struct cpumask *cpu_map,
return ret; return ret;
} }
static cpumask_var_t *doms_cur; /* current sched domains */ /* Current sched domains: */
static int ndoms_cur; /* number of sched domains in 'doms_cur' */ static cpumask_var_t *doms_cur;
static struct sched_domain_attr *dattr_cur;
/* attribues of custom domains in 'doms_cur' */ /* Number of sched domains in 'doms_cur': */
static int ndoms_cur;
/* Attribues of custom domains in 'doms_cur' */
static struct sched_domain_attr *dattr_cur;
/* /*
* Special case: If a kmalloc of a doms_cur partition (array of * Special case: If a kmalloc() of a doms_cur partition (array of
* cpumask) fails, then fallback to a single sched domain, * cpumask) fails, then fallback to a single sched domain,
* as determined by the single cpumask fallback_doms. * as determined by the single cpumask fallback_doms.
*/ */
static cpumask_var_t fallback_doms; static cpumask_var_t fallback_doms;
/* /*
* arch_update_cpu_topology lets virtualized architectures update the * arch_update_cpu_topology lets virtualized architectures update the
* cpu core maps. It is supposed to return 1 if the topology changed * CPU core maps. It is supposed to return 1 if the topology changed
* or 0 if it stayed the same. * or 0 if it stayed the same.
*/ */
int __weak arch_update_cpu_topology(void) int __weak arch_update_cpu_topology(void)
...@@ -7234,7 +7229,7 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms) ...@@ -7234,7 +7229,7 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
/* /*
* Set up scheduler domains and groups. Callers must hold the hotplug lock. * Set up scheduler domains and groups. Callers must hold the hotplug lock.
* For now this just excludes isolated cpus, but could be used to * For now this just excludes isolated CPUs, but could be used to
* exclude other special cases in the future. * exclude other special cases in the future.
*/ */
static int init_sched_domains(const struct cpumask *cpu_map) static int init_sched_domains(const struct cpumask *cpu_map)
...@@ -7254,8 +7249,8 @@ static int init_sched_domains(const struct cpumask *cpu_map) ...@@ -7254,8 +7249,8 @@ static int init_sched_domains(const struct cpumask *cpu_map)
} }
/* /*
* Detach sched domains from a group of cpus specified in cpu_map * Detach sched domains from a group of CPUs specified in cpu_map
* These cpus will now be attached to the NULL domain * These CPUs will now be attached to the NULL domain
*/ */
static void detach_destroy_domains(const struct cpumask *cpu_map) static void detach_destroy_domains(const struct cpumask *cpu_map)
{ {
...@@ -7273,7 +7268,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, ...@@ -7273,7 +7268,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
{ {
struct sched_domain_attr tmp; struct sched_domain_attr tmp;
/* fast path */ /* Fast path: */
if (!new && !cur) if (!new && !cur)
return 1; return 1;
...@@ -7317,22 +7312,22 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], ...@@ -7317,22 +7312,22 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
mutex_lock(&sched_domains_mutex); mutex_lock(&sched_domains_mutex);
/* always unregister in case we don't destroy any domains */ /* Always unregister in case we don't destroy any domains: */
unregister_sched_domain_sysctl(); unregister_sched_domain_sysctl();
/* Let architecture update cpu core mappings. */ /* Let the architecture update CPU core mappings: */
new_topology = arch_update_cpu_topology(); new_topology = arch_update_cpu_topology();
n = doms_new ? ndoms_new : 0; n = doms_new ? ndoms_new : 0;
/* Destroy deleted domains */ /* Destroy deleted domains: */
for (i = 0; i < ndoms_cur; i++) { for (i = 0; i < ndoms_cur; i++) {
for (j = 0; j < n && !new_topology; j++) { for (j = 0; j < n && !new_topology; j++) {
if (cpumask_equal(doms_cur[i], doms_new[j]) if (cpumask_equal(doms_cur[i], doms_new[j])
&& dattrs_equal(dattr_cur, i, dattr_new, j)) && dattrs_equal(dattr_cur, i, dattr_new, j))
goto match1; goto match1;
} }
/* no match - a current sched domain not in new doms_new[] */ /* No match - a current sched domain not in new doms_new[] */
detach_destroy_domains(doms_cur[i]); detach_destroy_domains(doms_cur[i]);
match1: match1:
; ;
...@@ -7346,23 +7341,24 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], ...@@ -7346,23 +7341,24 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
WARN_ON_ONCE(dattr_new); WARN_ON_ONCE(dattr_new);
} }
/* Build new domains */ /* Build new domains: */
for (i = 0; i < ndoms_new; i++) { for (i = 0; i < ndoms_new; i++) {
for (j = 0; j < n && !new_topology; j++) { for (j = 0; j < n && !new_topology; j++) {
if (cpumask_equal(doms_new[i], doms_cur[j]) if (cpumask_equal(doms_new[i], doms_cur[j])
&& dattrs_equal(dattr_new, i, dattr_cur, j)) && dattrs_equal(dattr_new, i, dattr_cur, j))
goto match2; goto match2;
} }
/* no match - add a new doms_new */ /* No match - add a new doms_new */
build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL); build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
match2: match2:
; ;
} }
/* Remember the new sched domains */ /* Remember the new sched domains: */
if (doms_cur != &fallback_doms) if (doms_cur != &fallback_doms)
free_sched_domains(doms_cur, ndoms_cur); free_sched_domains(doms_cur, ndoms_cur);
kfree(dattr_cur); /* kfree(NULL) is safe */
kfree(dattr_cur);
doms_cur = doms_new; doms_cur = doms_new;
dattr_cur = dattr_new; dattr_cur = dattr_new;
ndoms_cur = ndoms_new; ndoms_cur = ndoms_new;
...@@ -7372,7 +7368,10 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], ...@@ -7372,7 +7368,10 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
mutex_unlock(&sched_domains_mutex); mutex_unlock(&sched_domains_mutex);
} }
static int num_cpus_frozen; /* used to mark begin/end of suspend/resume */ /*
* used to mark begin/end of suspend/resume:
*/
static int num_cpus_frozen;
/* /*
* Update cpusets according to cpu_active mask. If cpusets are * Update cpusets according to cpu_active mask. If cpusets are
...@@ -7449,7 +7448,7 @@ int sched_cpu_activate(unsigned int cpu) ...@@ -7449,7 +7448,7 @@ int sched_cpu_activate(unsigned int cpu)
* Put the rq online, if not already. This happens: * Put the rq online, if not already. This happens:
* *
* 1) In the early boot process, because we build the real domains * 1) In the early boot process, because we build the real domains
* after all cpus have been brought up. * after all CPUs have been brought up.
* *
* 2) At runtime, if cpuset_cpu_active() fails to rebuild the * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
* domains. * domains.
...@@ -7564,7 +7563,7 @@ void __init sched_init_smp(void) ...@@ -7564,7 +7563,7 @@ void __init sched_init_smp(void)
/* /*
* There's no userspace yet to cause hotplug operations; hence all the * There's no userspace yet to cause hotplug operations; hence all the
* cpu masks are stable and all blatant races in the below code cannot * CPU masks are stable and all blatant races in the below code cannot
* happen. * happen.
*/ */
mutex_lock(&sched_domains_mutex); mutex_lock(&sched_domains_mutex);
...@@ -7684,10 +7683,8 @@ void __init sched_init(void) ...@@ -7684,10 +7683,8 @@ void __init sched_init(void)
} }
#endif /* CONFIG_CPUMASK_OFFSTACK */ #endif /* CONFIG_CPUMASK_OFFSTACK */
init_rt_bandwidth(&def_rt_bandwidth, init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());
global_rt_period(), global_rt_runtime()); init_dl_bandwidth(&def_dl_bandwidth, global_rt_period(), global_rt_runtime());
init_dl_bandwidth(&def_dl_bandwidth,
global_rt_period(), global_rt_runtime());
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
init_defrootdomain(); init_defrootdomain();
...@@ -7723,18 +7720,18 @@ void __init sched_init(void) ...@@ -7723,18 +7720,18 @@ void __init sched_init(void)
INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
rq->tmp_alone_branch = &rq->leaf_cfs_rq_list; rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
/* /*
* How much cpu bandwidth does root_task_group get? * How much CPU bandwidth does root_task_group get?
* *
* In case of task-groups formed thr' the cgroup filesystem, it * In case of task-groups formed thr' the cgroup filesystem, it
* gets 100% of the cpu resources in the system. This overall * gets 100% of the CPU resources in the system. This overall
* system cpu resource is divided among the tasks of * system CPU resource is divided among the tasks of
* root_task_group and its child task-groups in a fair manner, * root_task_group and its child task-groups in a fair manner,
* based on each entity's (task or task-group's) weight * based on each entity's (task or task-group's) weight
* (se->load.weight). * (se->load.weight).
* *
* In other words, if root_task_group has 10 tasks of weight * In other words, if root_task_group has 10 tasks of weight
* 1024) and two child groups A0 and A1 (of weight 1024 each), * 1024) and two child groups A0 and A1 (of weight 1024 each),
* then A0's share of the cpu resource is: * then A0's share of the CPU resource is:
* *
* A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
* *
...@@ -7843,10 +7840,14 @@ EXPORT_SYMBOL(__might_sleep); ...@@ -7843,10 +7840,14 @@ EXPORT_SYMBOL(__might_sleep);
void ___might_sleep(const char *file, int line, int preempt_offset) void ___might_sleep(const char *file, int line, int preempt_offset)
{ {
static unsigned long prev_jiffy; /* ratelimiting */ /* Ratelimiting timestamp: */
static unsigned long prev_jiffy;
unsigned long preempt_disable_ip; unsigned long preempt_disable_ip;
rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ /* WARN_ON_ONCE() by default, no rate limit required: */
rcu_sleep_check();
if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
!is_idle_task(current)) || !is_idle_task(current)) ||
system_state != SYSTEM_RUNNING || oops_in_progress) system_state != SYSTEM_RUNNING || oops_in_progress)
...@@ -7855,7 +7856,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset) ...@@ -7855,7 +7856,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
return; return;
prev_jiffy = jiffies; prev_jiffy = jiffies;
/* Save this before calling printk(), since that will clobber it */ /* Save this before calling printk(), since that will clobber it: */
preempt_disable_ip = get_preempt_disable_ip(current); preempt_disable_ip = get_preempt_disable_ip(current);
printk(KERN_ERR printk(KERN_ERR
...@@ -7934,7 +7935,7 @@ void normalize_rt_tasks(void) ...@@ -7934,7 +7935,7 @@ void normalize_rt_tasks(void)
*/ */
/** /**
* curr_task - return the current task for a given cpu. * curr_task - return the current task for a given CPU.
* @cpu: the processor in question. * @cpu: the processor in question.
* *
* ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
...@@ -7950,13 +7951,13 @@ struct task_struct *curr_task(int cpu) ...@@ -7950,13 +7951,13 @@ struct task_struct *curr_task(int cpu)
#ifdef CONFIG_IA64 #ifdef CONFIG_IA64
/** /**
* set_curr_task - set the current task for a given cpu. * set_curr_task - set the current task for a given CPU.
* @cpu: the processor in question. * @cpu: the processor in question.
* @p: the task pointer to set. * @p: the task pointer to set.
* *
* Description: This function must only be used when non-maskable interrupts * Description: This function must only be used when non-maskable interrupts
* are serviced on a separate stack. It allows the architecture to switch the * are serviced on a separate stack. It allows the architecture to switch the
* notion of the current task on a cpu in a non-blocking manner. This function * notion of the current task on a CPU in a non-blocking manner. This function
* must be called with all CPU's synchronized, and interrupts disabled, the * must be called with all CPU's synchronized, and interrupts disabled, the
* and caller must save the original value of the current task (see * and caller must save the original value of the current task (see
* curr_task() above) and restore that value before reenabling interrupts and * curr_task() above) and restore that value before reenabling interrupts and
...@@ -8012,7 +8013,8 @@ void sched_online_group(struct task_group *tg, struct task_group *parent) ...@@ -8012,7 +8013,8 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
spin_lock_irqsave(&task_group_lock, flags); spin_lock_irqsave(&task_group_lock, flags);
list_add_rcu(&tg->list, &task_groups); list_add_rcu(&tg->list, &task_groups);
WARN_ON(!parent); /* root should already exist */ /* Root should already exist: */
WARN_ON(!parent);
tg->parent = parent; tg->parent = parent;
INIT_LIST_HEAD(&tg->children); INIT_LIST_HEAD(&tg->children);
...@@ -8025,13 +8027,13 @@ void sched_online_group(struct task_group *tg, struct task_group *parent) ...@@ -8025,13 +8027,13 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
/* rcu callback to free various structures associated with a task group */ /* rcu callback to free various structures associated with a task group */
static void sched_free_group_rcu(struct rcu_head *rhp) static void sched_free_group_rcu(struct rcu_head *rhp)
{ {
/* now it should be safe to free those cfs_rqs */ /* Now it should be safe to free those cfs_rqs: */
sched_free_group(container_of(rhp, struct task_group, rcu)); sched_free_group(container_of(rhp, struct task_group, rcu));
} }
void sched_destroy_group(struct task_group *tg) void sched_destroy_group(struct task_group *tg)
{ {
/* wait for possible concurrent references to cfs_rqs complete */ /* Wait for possible concurrent references to cfs_rqs complete: */
call_rcu(&tg->rcu, sched_free_group_rcu); call_rcu(&tg->rcu, sched_free_group_rcu);
} }
...@@ -8039,7 +8041,7 @@ void sched_offline_group(struct task_group *tg) ...@@ -8039,7 +8041,7 @@ void sched_offline_group(struct task_group *tg)
{ {
unsigned long flags; unsigned long flags;
/* end participation in shares distribution */ /* End participation in shares distribution: */
unregister_fair_sched_group(tg); unregister_fair_sched_group(tg);
spin_lock_irqsave(&task_group_lock, flags); spin_lock_irqsave(&task_group_lock, flags);
...@@ -8468,8 +8470,10 @@ int sched_rr_handler(struct ctl_table *table, int write, ...@@ -8468,8 +8470,10 @@ int sched_rr_handler(struct ctl_table *table, int write,
mutex_lock(&mutex); mutex_lock(&mutex);
ret = proc_dointvec(table, write, buffer, lenp, ppos); ret = proc_dointvec(table, write, buffer, lenp, ppos);
/* make sure that internally we keep jiffies */ /*
/* also, writing zero resets timeslice to default */ * Make sure that internally we keep jiffies.
* Also, writing zero resets the timeslice to default:
*/
if (!ret && write) { if (!ret && write) {
sched_rr_timeslice = sched_rr_timeslice =
sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE : sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
...@@ -8654,9 +8658,11 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) ...@@ -8654,9 +8658,11 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
cfs_b->quota = quota; cfs_b->quota = quota;
__refill_cfs_bandwidth_runtime(cfs_b); __refill_cfs_bandwidth_runtime(cfs_b);
/* restart the period timer (if active) to handle new period expiry */
/* Restart the period timer (if active) to handle new period expiry: */
if (runtime_enabled) if (runtime_enabled)
start_cfs_bandwidth(cfs_b); start_cfs_bandwidth(cfs_b);
raw_spin_unlock_irq(&cfs_b->lock); raw_spin_unlock_irq(&cfs_b->lock);
for_each_online_cpu(i) { for_each_online_cpu(i) {
...@@ -8794,8 +8800,8 @@ static int tg_cfs_schedulable_down(struct task_group *tg, void *data) ...@@ -8794,8 +8800,8 @@ static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
parent_quota = parent_b->hierarchical_quota; parent_quota = parent_b->hierarchical_quota;
/* /*
* ensure max(child_quota) <= parent_quota, inherit when no * Ensure max(child_quota) <= parent_quota, inherit when no
* limit is set * limit is set:
*/ */
if (quota == RUNTIME_INF) if (quota == RUNTIME_INF)
quota = parent_quota; quota = parent_quota;
...@@ -8904,7 +8910,7 @@ static struct cftype cpu_files[] = { ...@@ -8904,7 +8910,7 @@ static struct cftype cpu_files[] = {
.write_u64 = cpu_rt_period_write_uint, .write_u64 = cpu_rt_period_write_uint,
}, },
#endif #endif
{ } /* terminate */ { } /* Terminate */
}; };
struct cgroup_subsys cpu_cgrp_subsys = { struct cgroup_subsys cpu_cgrp_subsys = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment