Commit cb152ff2 authored by Nathan Zimmer's avatar Nathan Zimmer Committed by Ingo Molnar

sched: Fix /proc/sched_stat failure on very very large systems

On systems with 4096 cores doing a cat /proc/sched_stat fails,
because we are trying to push all the data into a single kmalloc
buffer.

The issue is on these very large machines all the data will not
fit in 4mb.

A better solution is to not use the single_open() mechanism but
to provide our own seq_operations.

The output should be identical to previous version and thus not
need the version number.
Reported-by: default avatarDave Jones <davej@redhat.com>
Signed-off-by: default avatarNathan Zimmer <nzimmer@sgi.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
[ Fix memleak]
[ Fix spello in comment]
[ Fix warnings]
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 1c3e8264
...@@ -21,14 +21,17 @@ static int show_schedstat(struct seq_file *seq, void *v) ...@@ -21,14 +21,17 @@ static int show_schedstat(struct seq_file *seq, void *v)
if (mask_str == NULL) if (mask_str == NULL)
return -ENOMEM; return -ENOMEM;
seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); if (v == (void *)1) {
seq_printf(seq, "timestamp %lu\n", jiffies); seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
for_each_online_cpu(cpu) { seq_printf(seq, "timestamp %lu\n", jiffies);
struct rq *rq = cpu_rq(cpu); } else {
struct rq *rq;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
struct sched_domain *sd; struct sched_domain *sd;
int dcount = 0; int dcount = 0;
#endif #endif
cpu = (unsigned long)(v - 2);
rq = cpu_rq(cpu);
/* runqueue-specific stats */ /* runqueue-specific stats */
seq_printf(seq, seq_printf(seq,
...@@ -77,30 +80,66 @@ static int show_schedstat(struct seq_file *seq, void *v) ...@@ -77,30 +80,66 @@ static int show_schedstat(struct seq_file *seq, void *v)
return 0; return 0;
} }
static int schedstat_open(struct inode *inode, struct file *file) /*
* This itererator needs some explanation.
* It returns 1 for the header position.
* This means 2 is cpu 0.
* In a hotplugged system some cpus, including cpu 0, may be missing so we have
* to use cpumask_* to iterate over the cpus.
*/
static void *schedstat_start(struct seq_file *file, loff_t *offset)
{ {
unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); unsigned long n = *offset;
char *buf = kmalloc(size, GFP_KERNEL);
struct seq_file *m;
int res;
if (!buf) if (n == 0)
return -ENOMEM; return (void *) 1;
res = single_open(file, show_schedstat, NULL);
if (!res) { n--;
m = file->private_data;
m->buf = buf; if (n > 0)
m->size = size; n = cpumask_next(n - 1, cpu_online_mask);
} else else
kfree(buf); n = cpumask_first(cpu_online_mask);
return res;
*offset = n + 1;
if (n < nr_cpu_ids)
return (void *)(unsigned long)(n + 2);
return NULL;
}
static void *schedstat_next(struct seq_file *file, void *data, loff_t *offset)
{
(*offset)++;
return schedstat_start(file, offset);
}
static void schedstat_stop(struct seq_file *file, void *data)
{
}
static const struct seq_operations schedstat_sops = {
.start = schedstat_start,
.next = schedstat_next,
.stop = schedstat_stop,
.show = show_schedstat,
};
static int schedstat_open(struct inode *inode, struct file *file)
{
return seq_open(file, &schedstat_sops);
} }
static int schedstat_release(struct inode *inode, struct file *file)
{
return 0;
};
static const struct file_operations proc_schedstat_operations = { static const struct file_operations proc_schedstat_operations = {
.open = schedstat_open, .open = schedstat_open,
.read = seq_read, .read = seq_read,
.llseek = seq_lseek, .llseek = seq_lseek,
.release = single_release, .release = schedstat_release,
}; };
static int __init proc_schedstat_init(void) static int __init proc_schedstat_init(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment