Commit 897e81be authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-core-for-linus' of...

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (35 commits)
  sched, cputime: Introduce thread_group_times()
  sched, cputime: Cleanups related to task_times()
  Revert "sched, x86: Optimize branch hint in __switch_to()"
  sched: Fix isolcpus boot option
  sched: Revert 498657a4
  sched, time: Define nsecs_to_jiffies()
  sched: Remove task_{u,s,g}time()
  sched: Introduce task_times() to replace task_{u,s}time() pair
  sched: Limit the number of scheduler debug messages
  sched.c: Call debug_show_all_locks() when dumping all tasks
  sched, x86: Optimize branch hint in __switch_to()
  sched: Optimize branch hint in context_switch()
  sched: Optimize branch hint in pick_next_task_fair()
  sched_feat_write(): Update ppos instead of file->f_pos
  sched: Sched_rt_periodic_timer vs cpu hotplug
  sched, kvm: Fix race condition involving sched_in_preempt_notifers
  sched: More generic WAKE_AFFINE vs select_idle_sibling()
  sched: Cleanup select_task_rq_fair()
  sched: Fix granularity of task_u/stime()
  sched: Fix/add missing update_rq_clock() calls
  ...
parents c3fa27d1 0cf55e1e
...@@ -6,6 +6,21 @@ be removed from this file. ...@@ -6,6 +6,21 @@ be removed from this file.
--------------------------- ---------------------------
What: USER_SCHED
When: 2.6.34
Why: USER_SCHED was implemented as a proof of concept for group scheduling.
The effect of USER_SCHED can already be achieved from userspace with
the help of libcgroup. The removal of USER_SCHED will also simplify
the scheduler code with the removal of one major ifdef. There are also
issues USER_SCHED has with USER_NS. A decision was taken not to fix
those and instead remove USER_SCHED. Also new group scheduling
features will not be implemented for USER_SCHED.
Who: Dhaval Giani <dhaval@linux.vnet.ibm.com>
---------------------------
What: PRISM54 What: PRISM54
When: 2.6.34 When: 2.6.34
......
...@@ -1072,7 +1072,8 @@ second). The meanings of the columns are as follows, from left to right: ...@@ -1072,7 +1072,8 @@ second). The meanings of the columns are as follows, from left to right:
- irq: servicing interrupts - irq: servicing interrupts
- softirq: servicing softirqs - softirq: servicing softirqs
- steal: involuntary wait - steal: involuntary wait
- guest: running a guest - guest: running a normal guest
- guest_nice: running a niced guest
The "intr" line gives counts of interrupts serviced since boot time, for each The "intr" line gives counts of interrupts serviced since boot time, for each
of the possible system interrupts. The first column is the total of all of the possible system interrupts. The first column is the total of all
......
...@@ -2186,6 +2186,8 @@ and is between 256 and 4096 characters. It is defined in the file ...@@ -2186,6 +2186,8 @@ and is between 256 and 4096 characters. It is defined in the file
sbni= [NET] Granch SBNI12 leased line adapter sbni= [NET] Granch SBNI12 leased line adapter
sched_debug [KNL] Enables verbose scheduler debug messages.
sc1200wdt= [HW,WDT] SC1200 WDT (watchdog) driver sc1200wdt= [HW,WDT] SC1200 WDT (watchdog) driver
Format: <io>[,<timeout>[,<isapnp>]] Format: <io>[,<timeout>[,<isapnp>]]
......
...@@ -410,6 +410,16 @@ static void task_show_stack_usage(struct seq_file *m, struct task_struct *task) ...@@ -410,6 +410,16 @@ static void task_show_stack_usage(struct seq_file *m, struct task_struct *task)
} }
#endif /* CONFIG_MMU */ #endif /* CONFIG_MMU */
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
{
seq_printf(m, "Cpus_allowed:\t");
seq_cpumask(m, &task->cpus_allowed);
seq_printf(m, "\n");
seq_printf(m, "Cpus_allowed_list:\t");
seq_cpumask_list(m, &task->cpus_allowed);
seq_printf(m, "\n");
}
int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task) struct pid *pid, struct task_struct *task)
{ {
...@@ -424,6 +434,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, ...@@ -424,6 +434,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
} }
task_sig(m, task); task_sig(m, task);
task_cap(m, task); task_cap(m, task);
task_cpus_allowed(m, task);
cpuset_task_status_allowed(m, task); cpuset_task_status_allowed(m, task);
#if defined(CONFIG_S390) #if defined(CONFIG_S390)
task_show_regs(m, task); task_show_regs(m, task);
...@@ -495,20 +506,17 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, ...@@ -495,20 +506,17 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
/* add up live thread stats at the group level */ /* add up live thread stats at the group level */
if (whole) { if (whole) {
struct task_cputime cputime;
struct task_struct *t = task; struct task_struct *t = task;
do { do {
min_flt += t->min_flt; min_flt += t->min_flt;
maj_flt += t->maj_flt; maj_flt += t->maj_flt;
gtime = cputime_add(gtime, task_gtime(t)); gtime = cputime_add(gtime, t->gtime);
t = next_thread(t); t = next_thread(t);
} while (t != task); } while (t != task);
min_flt += sig->min_flt; min_flt += sig->min_flt;
maj_flt += sig->maj_flt; maj_flt += sig->maj_flt;
thread_group_cputime(task, &cputime); thread_group_times(task, &utime, &stime);
utime = cputime.utime;
stime = cputime.stime;
gtime = cputime_add(gtime, sig->gtime); gtime = cputime_add(gtime, sig->gtime);
} }
...@@ -524,9 +532,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, ...@@ -524,9 +532,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
if (!whole) { if (!whole) {
min_flt = task->min_flt; min_flt = task->min_flt;
maj_flt = task->maj_flt; maj_flt = task->maj_flt;
utime = task_utime(task); task_times(task, &utime, &stime);
stime = task_stime(task); gtime = task->gtime;
gtime = task_gtime(task);
} }
/* scale priority and nice values from timeslices to -20..20 */ /* scale priority and nice values from timeslices to -20..20 */
......
...@@ -27,7 +27,7 @@ static int show_stat(struct seq_file *p, void *v) ...@@ -27,7 +27,7 @@ static int show_stat(struct seq_file *p, void *v)
int i, j; int i, j;
unsigned long jif; unsigned long jif;
cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
cputime64_t guest; cputime64_t guest, guest_nice;
u64 sum = 0; u64 sum = 0;
u64 sum_softirq = 0; u64 sum_softirq = 0;
unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
...@@ -36,7 +36,7 @@ static int show_stat(struct seq_file *p, void *v) ...@@ -36,7 +36,7 @@ static int show_stat(struct seq_file *p, void *v)
user = nice = system = idle = iowait = user = nice = system = idle = iowait =
irq = softirq = steal = cputime64_zero; irq = softirq = steal = cputime64_zero;
guest = cputime64_zero; guest = guest_nice = cputime64_zero;
getboottime(&boottime); getboottime(&boottime);
jif = boottime.tv_sec; jif = boottime.tv_sec;
...@@ -51,6 +51,8 @@ static int show_stat(struct seq_file *p, void *v) ...@@ -51,6 +51,8 @@ static int show_stat(struct seq_file *p, void *v)
softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
guest_nice = cputime64_add(guest_nice,
kstat_cpu(i).cpustat.guest_nice);
for_each_irq_nr(j) { for_each_irq_nr(j) {
sum += kstat_irqs_cpu(j, i); sum += kstat_irqs_cpu(j, i);
} }
...@@ -65,7 +67,8 @@ static int show_stat(struct seq_file *p, void *v) ...@@ -65,7 +67,8 @@ static int show_stat(struct seq_file *p, void *v)
} }
sum += arch_irq_stat(); sum += arch_irq_stat();
seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu "
"%llu\n",
(unsigned long long)cputime64_to_clock_t(user), (unsigned long long)cputime64_to_clock_t(user),
(unsigned long long)cputime64_to_clock_t(nice), (unsigned long long)cputime64_to_clock_t(nice),
(unsigned long long)cputime64_to_clock_t(system), (unsigned long long)cputime64_to_clock_t(system),
...@@ -74,7 +77,8 @@ static int show_stat(struct seq_file *p, void *v) ...@@ -74,7 +77,8 @@ static int show_stat(struct seq_file *p, void *v)
(unsigned long long)cputime64_to_clock_t(irq), (unsigned long long)cputime64_to_clock_t(irq),
(unsigned long long)cputime64_to_clock_t(softirq), (unsigned long long)cputime64_to_clock_t(softirq),
(unsigned long long)cputime64_to_clock_t(steal), (unsigned long long)cputime64_to_clock_t(steal),
(unsigned long long)cputime64_to_clock_t(guest)); (unsigned long long)cputime64_to_clock_t(guest),
(unsigned long long)cputime64_to_clock_t(guest_nice));
for_each_online_cpu(i) { for_each_online_cpu(i) {
/* Copy values here to work around gcc-2.95.3, gcc-2.96 */ /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
...@@ -88,8 +92,10 @@ static int show_stat(struct seq_file *p, void *v) ...@@ -88,8 +92,10 @@ static int show_stat(struct seq_file *p, void *v)
softirq = kstat_cpu(i).cpustat.softirq; softirq = kstat_cpu(i).cpustat.softirq;
steal = kstat_cpu(i).cpustat.steal; steal = kstat_cpu(i).cpustat.steal;
guest = kstat_cpu(i).cpustat.guest; guest = kstat_cpu(i).cpustat.guest;
guest_nice = kstat_cpu(i).cpustat.guest_nice;
seq_printf(p, seq_printf(p,
"cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
"%llu\n",
i, i,
(unsigned long long)cputime64_to_clock_t(user), (unsigned long long)cputime64_to_clock_t(user),
(unsigned long long)cputime64_to_clock_t(nice), (unsigned long long)cputime64_to_clock_t(nice),
...@@ -99,7 +105,8 @@ static int show_stat(struct seq_file *p, void *v) ...@@ -99,7 +105,8 @@ static int show_stat(struct seq_file *p, void *v)
(unsigned long long)cputime64_to_clock_t(irq), (unsigned long long)cputime64_to_clock_t(irq),
(unsigned long long)cputime64_to_clock_t(softirq), (unsigned long long)cputime64_to_clock_t(softirq),
(unsigned long long)cputime64_to_clock_t(steal), (unsigned long long)cputime64_to_clock_t(steal),
(unsigned long long)cputime64_to_clock_t(guest)); (unsigned long long)cputime64_to_clock_t(guest),
(unsigned long long)cputime64_to_clock_t(guest_nice));
} }
seq_printf(p, "intr %llu", (unsigned long long)sum); seq_printf(p, "intr %llu", (unsigned long long)sum);
......
...@@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x); ...@@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x);
extern unsigned long clock_t_to_jiffies(unsigned long x); extern unsigned long clock_t_to_jiffies(unsigned long x);
extern u64 jiffies_64_to_clock_t(u64 x); extern u64 jiffies_64_to_clock_t(u64 x);
extern u64 nsec_to_clock_t(u64 x); extern u64 nsec_to_clock_t(u64 x);
extern unsigned long nsecs_to_jiffies(u64 n);
#define TIMESTAMP_SIZE 30 #define TIMESTAMP_SIZE 30
......
...@@ -25,6 +25,7 @@ struct cpu_usage_stat { ...@@ -25,6 +25,7 @@ struct cpu_usage_stat {
cputime64_t iowait; cputime64_t iowait;
cputime64_t steal; cputime64_t steal;
cputime64_t guest; cputime64_t guest;
cputime64_t guest_nice;
}; };
struct kernel_stat { struct kernel_stat {
......
...@@ -105,6 +105,11 @@ struct preempt_notifier; ...@@ -105,6 +105,11 @@ struct preempt_notifier;
* @sched_out: we've just been preempted * @sched_out: we've just been preempted
* notifier: struct preempt_notifier for the task being preempted * notifier: struct preempt_notifier for the task being preempted
* next: the task that's kicking us out * next: the task that's kicking us out
*
* Please note that sched_in and out are called under different
* contexts. sched_out is called with rq lock held and irq disabled
* while sched_in is called without rq lock and irq enabled. This
* difference is intentional and depended upon by its users.
*/ */
struct preempt_ops { struct preempt_ops {
void (*sched_in)(struct preempt_notifier *notifier, int cpu); void (*sched_in)(struct preempt_notifier *notifier, int cpu);
......
...@@ -145,7 +145,6 @@ extern unsigned long this_cpu_load(void); ...@@ -145,7 +145,6 @@ extern unsigned long this_cpu_load(void);
extern void calc_global_load(void); extern void calc_global_load(void);
extern u64 cpu_nr_migrations(int cpu);
extern unsigned long get_parent_ip(unsigned long addr); extern unsigned long get_parent_ip(unsigned long addr);
...@@ -171,8 +170,6 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) ...@@ -171,8 +170,6 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
} }
#endif #endif
extern unsigned long long time_sync_thresh;
/* /*
* Task state bitmask. NOTE! These bits are also * Task state bitmask. NOTE! These bits are also
* encoded in fs/proc/array.c: get_task_state(). * encoded in fs/proc/array.c: get_task_state().
...@@ -349,7 +346,6 @@ extern signed long schedule_timeout(signed long timeout); ...@@ -349,7 +346,6 @@ extern signed long schedule_timeout(signed long timeout);
extern signed long schedule_timeout_interruptible(signed long timeout); extern signed long schedule_timeout_interruptible(signed long timeout);
extern signed long schedule_timeout_killable(signed long timeout); extern signed long schedule_timeout_killable(signed long timeout);
extern signed long schedule_timeout_uninterruptible(signed long timeout); extern signed long schedule_timeout_uninterruptible(signed long timeout);
asmlinkage void __schedule(void);
asmlinkage void schedule(void); asmlinkage void schedule(void);
extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
...@@ -628,6 +624,9 @@ struct signal_struct { ...@@ -628,6 +624,9 @@ struct signal_struct {
cputime_t utime, stime, cutime, cstime; cputime_t utime, stime, cutime, cstime;
cputime_t gtime; cputime_t gtime;
cputime_t cgtime; cputime_t cgtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
cputime_t prev_utime, prev_stime;
#endif
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
unsigned long inblock, oublock, cinblock, coublock; unsigned long inblock, oublock, cinblock, coublock;
...@@ -1013,9 +1012,13 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd) ...@@ -1013,9 +1012,13 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
return to_cpumask(sd->span); return to_cpumask(sd->span);
} }
extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
struct sched_domain_attr *dattr_new); struct sched_domain_attr *dattr_new);
/* Allocate an array of sched domains, for partition_sched_domains(). */
cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
/* Test a flag in parent sched domain */ /* Test a flag in parent sched domain */
static inline int test_sd_parent(struct sched_domain *sd, int flag) static inline int test_sd_parent(struct sched_domain *sd, int flag)
{ {
...@@ -1033,7 +1036,7 @@ unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); ...@@ -1033,7 +1036,7 @@ unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu);
struct sched_domain_attr; struct sched_domain_attr;
static inline void static inline void
partition_sched_domains(int ndoms_new, struct cpumask *doms_new, partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
struct sched_domain_attr *dattr_new) struct sched_domain_attr *dattr_new)
{ {
} }
...@@ -1331,7 +1334,9 @@ struct task_struct { ...@@ -1331,7 +1334,9 @@ struct task_struct {
cputime_t utime, stime, utimescaled, stimescaled; cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime; cputime_t gtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
cputime_t prev_utime, prev_stime; cputime_t prev_utime, prev_stime;
#endif
unsigned long nvcsw, nivcsw; /* context switch counts */ unsigned long nvcsw, nivcsw; /* context switch counts */
struct timespec start_time; /* monotonic time */ struct timespec start_time; /* monotonic time */
struct timespec real_start_time; /* boot based time */ struct timespec real_start_time; /* boot based time */
...@@ -1720,9 +1725,8 @@ static inline void put_task_struct(struct task_struct *t) ...@@ -1720,9 +1725,8 @@ static inline void put_task_struct(struct task_struct *t)
__put_task_struct(t); __put_task_struct(t);
} }
extern cputime_t task_utime(struct task_struct *p); extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
extern cputime_t task_stime(struct task_struct *p); extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
extern cputime_t task_gtime(struct task_struct *p);
/* /*
* Per process flags * Per process flags
......
...@@ -537,8 +537,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) ...@@ -537,8 +537,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
* element of the partition (one sched domain) to be passed to * element of the partition (one sched domain) to be passed to
* partition_sched_domains(). * partition_sched_domains().
*/ */
/* FIXME: see the FIXME in partition_sched_domains() */ static int generate_sched_domains(cpumask_var_t **domains,
static int generate_sched_domains(struct cpumask **domains,
struct sched_domain_attr **attributes) struct sched_domain_attr **attributes)
{ {
LIST_HEAD(q); /* queue of cpusets to be scanned */ LIST_HEAD(q); /* queue of cpusets to be scanned */
...@@ -546,7 +545,7 @@ static int generate_sched_domains(struct cpumask **domains, ...@@ -546,7 +545,7 @@ static int generate_sched_domains(struct cpumask **domains,
struct cpuset **csa; /* array of all cpuset ptrs */ struct cpuset **csa; /* array of all cpuset ptrs */
int csn; /* how many cpuset ptrs in csa so far */ int csn; /* how many cpuset ptrs in csa so far */
int i, j, k; /* indices for partition finding loops */ int i, j, k; /* indices for partition finding loops */
struct cpumask *doms; /* resulting partition; i.e. sched domains */ cpumask_var_t *doms; /* resulting partition; i.e. sched domains */
struct sched_domain_attr *dattr; /* attributes for custom domains */ struct sched_domain_attr *dattr; /* attributes for custom domains */
int ndoms = 0; /* number of sched domains in result */ int ndoms = 0; /* number of sched domains in result */
int nslot; /* next empty doms[] struct cpumask slot */ int nslot; /* next empty doms[] struct cpumask slot */
...@@ -557,7 +556,8 @@ static int generate_sched_domains(struct cpumask **domains, ...@@ -557,7 +556,8 @@ static int generate_sched_domains(struct cpumask **domains,
/* Special case for the 99% of systems with one, full, sched domain */ /* Special case for the 99% of systems with one, full, sched domain */
if (is_sched_load_balance(&top_cpuset)) { if (is_sched_load_balance(&top_cpuset)) {
doms = kmalloc(cpumask_size(), GFP_KERNEL); ndoms = 1;
doms = alloc_sched_domains(ndoms);
if (!doms) if (!doms)
goto done; goto done;
...@@ -566,9 +566,8 @@ static int generate_sched_domains(struct cpumask **domains, ...@@ -566,9 +566,8 @@ static int generate_sched_domains(struct cpumask **domains,
*dattr = SD_ATTR_INIT; *dattr = SD_ATTR_INIT;
update_domain_attr_tree(dattr, &top_cpuset); update_domain_attr_tree(dattr, &top_cpuset);
} }
cpumask_copy(doms, top_cpuset.cpus_allowed); cpumask_copy(doms[0], top_cpuset.cpus_allowed);
ndoms = 1;
goto done; goto done;
} }
...@@ -636,7 +635,7 @@ static int generate_sched_domains(struct cpumask **domains, ...@@ -636,7 +635,7 @@ static int generate_sched_domains(struct cpumask **domains,
* Now we know how many domains to create. * Now we know how many domains to create.
* Convert <csn, csa> to <ndoms, doms> and populate cpu masks. * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
*/ */
doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL); doms = alloc_sched_domains(ndoms);
if (!doms) if (!doms)
goto done; goto done;
...@@ -656,7 +655,7 @@ static int generate_sched_domains(struct cpumask **domains, ...@@ -656,7 +655,7 @@ static int generate_sched_domains(struct cpumask **domains,
continue; continue;
} }
dp = doms + nslot; dp = doms[nslot];
if (nslot == ndoms) { if (nslot == ndoms) {
static int warnings = 10; static int warnings = 10;
...@@ -718,7 +717,7 @@ static int generate_sched_domains(struct cpumask **domains, ...@@ -718,7 +717,7 @@ static int generate_sched_domains(struct cpumask **domains,
static void do_rebuild_sched_domains(struct work_struct *unused) static void do_rebuild_sched_domains(struct work_struct *unused)
{ {
struct sched_domain_attr *attr; struct sched_domain_attr *attr;
struct cpumask *doms; cpumask_var_t *doms;
int ndoms; int ndoms;
get_online_cpus(); get_online_cpus();
...@@ -2052,7 +2051,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, ...@@ -2052,7 +2051,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
unsigned long phase, void *unused_cpu) unsigned long phase, void *unused_cpu)
{ {
struct sched_domain_attr *attr; struct sched_domain_attr *attr;
struct cpumask *doms; cpumask_var_t *doms;
int ndoms; int ndoms;
switch (phase) { switch (phase) {
...@@ -2537,15 +2536,9 @@ const struct file_operations proc_cpuset_operations = { ...@@ -2537,15 +2536,9 @@ const struct file_operations proc_cpuset_operations = {
}; };
#endif /* CONFIG_PROC_PID_CPUSET */ #endif /* CONFIG_PROC_PID_CPUSET */
/* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */ /* Display task mems_allowed in /proc/<pid>/status file. */
void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
{ {
seq_printf(m, "Cpus_allowed:\t");
seq_cpumask(m, &task->cpus_allowed);
seq_printf(m, "\n");
seq_printf(m, "Cpus_allowed_list:\t");
seq_cpumask_list(m, &task->cpus_allowed);
seq_printf(m, "\n");
seq_printf(m, "Mems_allowed:\t"); seq_printf(m, "Mems_allowed:\t");
seq_nodemask(m, &task->mems_allowed); seq_nodemask(m, &task->mems_allowed);
seq_printf(m, "\n"); seq_printf(m, "\n");
......
...@@ -111,9 +111,9 @@ static void __exit_signal(struct task_struct *tsk) ...@@ -111,9 +111,9 @@ static void __exit_signal(struct task_struct *tsk)
* We won't ever get here for the group leader, since it * We won't ever get here for the group leader, since it
* will have been the last reference on the signal_struct. * will have been the last reference on the signal_struct.
*/ */
sig->utime = cputime_add(sig->utime, task_utime(tsk)); sig->utime = cputime_add(sig->utime, tsk->utime);
sig->stime = cputime_add(sig->stime, task_stime(tsk)); sig->stime = cputime_add(sig->stime, tsk->stime);
sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); sig->gtime = cputime_add(sig->gtime, tsk->gtime);
sig->min_flt += tsk->min_flt; sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt; sig->maj_flt += tsk->maj_flt;
sig->nvcsw += tsk->nvcsw; sig->nvcsw += tsk->nvcsw;
...@@ -1210,6 +1210,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) ...@@ -1210,6 +1210,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
struct signal_struct *psig; struct signal_struct *psig;
struct signal_struct *sig; struct signal_struct *sig;
unsigned long maxrss; unsigned long maxrss;
cputime_t tgutime, tgstime;
/* /*
* The resource counters for the group leader are in its * The resource counters for the group leader are in its
...@@ -1225,20 +1226,23 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) ...@@ -1225,20 +1226,23 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
* need to protect the access to parent->signal fields, * need to protect the access to parent->signal fields,
* as other threads in the parent group can be right * as other threads in the parent group can be right
* here reaping other children at the same time. * here reaping other children at the same time.
*
* We use thread_group_times() to get times for the thread
* group, which consolidates times for all threads in the
* group including the group leader.
*/ */
thread_group_times(p, &tgutime, &tgstime);
spin_lock_irq(&p->real_parent->sighand->siglock); spin_lock_irq(&p->real_parent->sighand->siglock);
psig = p->real_parent->signal; psig = p->real_parent->signal;
sig = p->signal; sig = p->signal;
psig->cutime = psig->cutime =
cputime_add(psig->cutime, cputime_add(psig->cutime,
cputime_add(p->utime, cputime_add(tgutime,
cputime_add(sig->utime, sig->cutime));
sig->cutime)));
psig->cstime = psig->cstime =
cputime_add(psig->cstime, cputime_add(psig->cstime,
cputime_add(p->stime, cputime_add(tgstime,
cputime_add(sig->stime, sig->cstime));
sig->cstime)));
psig->cgtime = psig->cgtime =
cputime_add(psig->cgtime, cputime_add(psig->cgtime,
cputime_add(p->gtime, cputime_add(p->gtime,
......
...@@ -884,6 +884,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) ...@@ -884,6 +884,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
sig->gtime = cputime_zero; sig->gtime = cputime_zero;
sig->cgtime = cputime_zero; sig->cgtime = cputime_zero;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
sig->prev_utime = sig->prev_stime = cputime_zero;
#endif
sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
...@@ -1066,8 +1069,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1066,8 +1069,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->gtime = cputime_zero; p->gtime = cputime_zero;
p->utimescaled = cputime_zero; p->utimescaled = cputime_zero;
p->stimescaled = cputime_zero; p->stimescaled = cputime_zero;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
p->prev_utime = cputime_zero; p->prev_utime = cputime_zero;
p->prev_stime = cputime_zero; p->prev_stime = cputime_zero;
#endif
p->default_timer_slack_ns = current->timer_slack_ns; p->default_timer_slack_ns = current->timer_slack_ns;
......
...@@ -870,7 +870,7 @@ static void gdb_cmd_getregs(struct kgdb_state *ks) ...@@ -870,7 +870,7 @@ static void gdb_cmd_getregs(struct kgdb_state *ks)
/* /*
* All threads that don't have debuggerinfo should be * All threads that don't have debuggerinfo should be
* in __schedule() sleeping, since all other CPUs * in schedule() sleeping, since all other CPUs
* are in kgdb_wait, and thus have debuggerinfo. * are in kgdb_wait, and thus have debuggerinfo.
*/ */
if (local_debuggerinfo) { if (local_debuggerinfo) {
......
This diff is collapsed.
...@@ -285,12 +285,16 @@ static void print_cpu(struct seq_file *m, int cpu) ...@@ -285,12 +285,16 @@ static void print_cpu(struct seq_file *m, int cpu)
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n); #define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
P(yld_count); P(yld_count);
P(sched_switch); P(sched_switch);
P(sched_count); P(sched_count);
P(sched_goidle); P(sched_goidle);
#ifdef CONFIG_SMP
P64(avg_idle);
#endif
P(ttwu_count); P(ttwu_count);
P(ttwu_local); P(ttwu_local);
......
...@@ -1344,6 +1344,37 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) ...@@ -1344,6 +1344,37 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
return idlest; return idlest;
} }
/*
* Try and locate an idle CPU in the sched_domain.
*/
static int
select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target)
{
int cpu = smp_processor_id();
int prev_cpu = task_cpu(p);
int i;
/*
* If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE
* test in select_task_rq_fair) and the prev_cpu is idle then that's
* always a better target than the current cpu.
*/
if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running)
return prev_cpu;
/*
* Otherwise, iterate the domain and find an elegible idle cpu.
*/
for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
if (!cpu_rq(i)->cfs.nr_running) {
target = i;
break;
}
}
return target;
}
/* /*
* sched_balance_self: balance the current task (running on cpu) in domains * sched_balance_self: balance the current task (running on cpu) in domains
* that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
...@@ -1398,11 +1429,35 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag ...@@ -1398,11 +1429,35 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
want_sd = 0; want_sd = 0;
} }
if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && /*
cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { * While iterating the domains looking for a spanning
* WAKE_AFFINE domain, adjust the affine target to any idle cpu
* in cache sharing domains along the way.
*/
if (want_affine) {
int target = -1;
affine_sd = tmp; /*
want_affine = 0; * If both cpu and prev_cpu are part of this domain,
* cpu is a valid SD_WAKE_AFFINE target.
*/
if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp)))
target = cpu;
/*
* If there's an idle sibling in this domain, make that
* the wake_affine target instead of the current cpu.
*/
if (tmp->flags & SD_PREFER_SIBLING)
target = select_idle_sibling(p, tmp, target);
if (target >= 0) {
if (tmp->flags & SD_WAKE_AFFINE) {
affine_sd = tmp;
want_affine = 0;
}
cpu = target;
}
} }
if (!want_sd && !want_affine) if (!want_sd && !want_affine)
...@@ -1679,7 +1734,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) ...@@ -1679,7 +1734,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
struct cfs_rq *cfs_rq = &rq->cfs; struct cfs_rq *cfs_rq = &rq->cfs;
struct sched_entity *se; struct sched_entity *se;
if (unlikely(!cfs_rq->nr_running)) if (!cfs_rq->nr_running)
return NULL; return NULL;
do { do {
......
...@@ -1153,29 +1153,12 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) ...@@ -1153,29 +1153,12 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
static inline int pick_optimal_cpu(int this_cpu,
const struct cpumask *mask)
{
int first;
/* "this_cpu" is cheaper to preempt than a remote processor */
if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask))
return this_cpu;
first = cpumask_first(mask);
if (first < nr_cpu_ids)
return first;
return -1;
}
static int find_lowest_rq(struct task_struct *task) static int find_lowest_rq(struct task_struct *task)
{ {
struct sched_domain *sd; struct sched_domain *sd;
struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
int this_cpu = smp_processor_id(); int this_cpu = smp_processor_id();
int cpu = task_cpu(task); int cpu = task_cpu(task);
cpumask_var_t domain_mask;
if (task->rt.nr_cpus_allowed == 1) if (task->rt.nr_cpus_allowed == 1)
return -1; /* No other targets possible */ return -1; /* No other targets possible */
...@@ -1198,28 +1181,26 @@ static int find_lowest_rq(struct task_struct *task) ...@@ -1198,28 +1181,26 @@ static int find_lowest_rq(struct task_struct *task)
* Otherwise, we consult the sched_domains span maps to figure * Otherwise, we consult the sched_domains span maps to figure
* out which cpu is logically closest to our hot cache data. * out which cpu is logically closest to our hot cache data.
*/ */
if (this_cpu == cpu) if (!cpumask_test_cpu(this_cpu, lowest_mask))
this_cpu = -1; /* Skip this_cpu opt if the same */ this_cpu = -1; /* Skip this_cpu opt if not among lowest */
if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) {
for_each_domain(cpu, sd) {
if (sd->flags & SD_WAKE_AFFINE) {
int best_cpu;
cpumask_and(domain_mask, for_each_domain(cpu, sd) {
sched_domain_span(sd), if (sd->flags & SD_WAKE_AFFINE) {
lowest_mask); int best_cpu;
best_cpu = pick_optimal_cpu(this_cpu, /*
domain_mask); * "this_cpu" is cheaper to preempt than a
* remote processor.
if (best_cpu != -1) { */
free_cpumask_var(domain_mask); if (this_cpu != -1 &&
return best_cpu; cpumask_test_cpu(this_cpu, sched_domain_span(sd)))
} return this_cpu;
}
best_cpu = cpumask_first_and(lowest_mask,
sched_domain_span(sd));
if (best_cpu < nr_cpu_ids)
return best_cpu;
} }
free_cpumask_var(domain_mask);
} }
/* /*
...@@ -1227,7 +1208,13 @@ static int find_lowest_rq(struct task_struct *task) ...@@ -1227,7 +1208,13 @@ static int find_lowest_rq(struct task_struct *task)
* just give the caller *something* to work with from the compatible * just give the caller *something* to work with from the compatible
* locations. * locations.
*/ */
return pick_optimal_cpu(this_cpu, lowest_mask); if (this_cpu != -1)
return this_cpu;
cpu = cpumask_any(lowest_mask);
if (cpu < nr_cpu_ids)
return cpu;
return -1;
} }
/* Will lock the rq it finds */ /* Will lock the rq it finds */
......
...@@ -911,16 +911,15 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid) ...@@ -911,16 +911,15 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
void do_sys_times(struct tms *tms) void do_sys_times(struct tms *tms)
{ {
struct task_cputime cputime; cputime_t tgutime, tgstime, cutime, cstime;
cputime_t cutime, cstime;
thread_group_cputime(current, &cputime);
spin_lock_irq(&current->sighand->siglock); spin_lock_irq(&current->sighand->siglock);
thread_group_times(current, &tgutime, &tgstime);
cutime = current->signal->cutime; cutime = current->signal->cutime;
cstime = current->signal->cstime; cstime = current->signal->cstime;
spin_unlock_irq(&current->sighand->siglock); spin_unlock_irq(&current->sighand->siglock);
tms->tms_utime = cputime_to_clock_t(cputime.utime); tms->tms_utime = cputime_to_clock_t(tgutime);
tms->tms_stime = cputime_to_clock_t(cputime.stime); tms->tms_stime = cputime_to_clock_t(tgstime);
tms->tms_cutime = cputime_to_clock_t(cutime); tms->tms_cutime = cputime_to_clock_t(cutime);
tms->tms_cstime = cputime_to_clock_t(cstime); tms->tms_cstime = cputime_to_clock_t(cstime);
} }
...@@ -1338,16 +1337,14 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) ...@@ -1338,16 +1337,14 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
{ {
struct task_struct *t; struct task_struct *t;
unsigned long flags; unsigned long flags;
cputime_t utime, stime; cputime_t tgutime, tgstime, utime, stime;
struct task_cputime cputime;
unsigned long maxrss = 0; unsigned long maxrss = 0;
memset((char *) r, 0, sizeof *r); memset((char *) r, 0, sizeof *r);
utime = stime = cputime_zero; utime = stime = cputime_zero;
if (who == RUSAGE_THREAD) { if (who == RUSAGE_THREAD) {
utime = task_utime(current); task_times(current, &utime, &stime);
stime = task_stime(current);
accumulate_thread_rusage(p, r); accumulate_thread_rusage(p, r);
maxrss = p->signal->maxrss; maxrss = p->signal->maxrss;
goto out; goto out;
...@@ -1373,9 +1370,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) ...@@ -1373,9 +1370,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
break; break;
case RUSAGE_SELF: case RUSAGE_SELF:
thread_group_cputime(p, &cputime); thread_group_times(p, &tgutime, &tgstime);
utime = cputime_add(utime, cputime.utime); utime = cputime_add(utime, tgutime);
stime = cputime_add(stime, cputime.stime); stime = cputime_add(stime, tgstime);
r->ru_nvcsw += p->signal->nvcsw; r->ru_nvcsw += p->signal->nvcsw;
r->ru_nivcsw += p->signal->nivcsw; r->ru_nivcsw += p->signal->nivcsw;
r->ru_minflt += p->signal->min_flt; r->ru_minflt += p->signal->min_flt;
......
...@@ -662,6 +662,36 @@ u64 nsec_to_clock_t(u64 x) ...@@ -662,6 +662,36 @@ u64 nsec_to_clock_t(u64 x)
#endif #endif
} }
/**
* nsecs_to_jiffies - Convert nsecs in u64 to jiffies
*
* @n: nsecs in u64
*
* Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
* And this doesn't return MAX_JIFFY_OFFSET since this function is designed
* for scheduler, not for use in device drivers to calculate timeout value.
*
* note:
* NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
* ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
*/
unsigned long nsecs_to_jiffies(u64 n)
{
#if (NSEC_PER_SEC % HZ) == 0
/* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
return div_u64(n, NSEC_PER_SEC / HZ);
#elif (HZ % 512) == 0
/* overflow after 292 years if HZ = 1024 */
return div_u64(n * HZ / 512, NSEC_PER_SEC / 512);
#else
/*
* Generic case - optimized for cases where HZ is a multiple of 3.
* overflow after 64.99 years, exact for HZ = 60, 72, 90, 120 etc.
*/
return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ);
#endif
}
#if (BITS_PER_LONG < 64) #if (BITS_PER_LONG < 64)
u64 get_jiffies_64(void) u64 get_jiffies_64(void)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment