Commit bbbfeac9 authored by Nathan Zimmer's avatar Nathan Zimmer Committed by Ingo Molnar

sched: Fix /proc/sched_debug failure on very very large systems

On systems with 4096 cores attemping to read /proc/sched_debug
fails because we are trying to push all the data into a single
kmalloc buffer.

The issue is on these very large machines all the data will not
fit in 4mb.

A better solution is to not us the single_open mechanism but to
provide our own seq_operations and treat each cpu as an
individual record.

The output should be identical to the previous version.
Reported-by: default avatarDave Jones <davej@redhat.com>
Signed-off-by: default avatarNathan Zimmer <nzimmer@sgi.com>
Cc: Peter Zijlstra <peterz@infradead.org>)
[ Whitespace fixlet]
[ Fix spello in comment]
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent cb152ff2
...@@ -269,11 +269,11 @@ static void print_cpu(struct seq_file *m, int cpu) ...@@ -269,11 +269,11 @@ static void print_cpu(struct seq_file *m, int cpu)
{ {
unsigned int freq = cpu_khz ? : 1; unsigned int freq = cpu_khz ? : 1;
SEQ_printf(m, "\ncpu#%d, %u.%03u MHz\n", SEQ_printf(m, "cpu#%d, %u.%03u MHz\n",
cpu, freq / 1000, (freq % 1000)); cpu, freq / 1000, (freq % 1000));
} }
#else #else
SEQ_printf(m, "\ncpu#%d\n", cpu); SEQ_printf(m, "cpu#%d\n", cpu);
#endif #endif
#define P(x) \ #define P(x) \
...@@ -330,6 +330,7 @@ do { \ ...@@ -330,6 +330,7 @@ do { \
print_rq(m, rq, cpu); print_rq(m, rq, cpu);
rcu_read_unlock(); rcu_read_unlock();
spin_unlock_irqrestore(&sched_debug_lock, flags); spin_unlock_irqrestore(&sched_debug_lock, flags);
SEQ_printf(m, "\n");
} }
static const char *sched_tunable_scaling_names[] = { static const char *sched_tunable_scaling_names[] = {
...@@ -338,11 +339,10 @@ static const char *sched_tunable_scaling_names[] = { ...@@ -338,11 +339,10 @@ static const char *sched_tunable_scaling_names[] = {
"linear" "linear"
}; };
static int sched_debug_show(struct seq_file *m, void *v) static void sched_debug_header(struct seq_file *m)
{ {
u64 ktime, sched_clk, cpu_clk; u64 ktime, sched_clk, cpu_clk;
unsigned long flags; unsigned long flags;
int cpu;
local_irq_save(flags); local_irq_save(flags);
ktime = ktime_to_ns(ktime_get()); ktime = ktime_to_ns(ktime_get());
...@@ -384,33 +384,101 @@ static int sched_debug_show(struct seq_file *m, void *v) ...@@ -384,33 +384,101 @@ static int sched_debug_show(struct seq_file *m, void *v)
#undef PN #undef PN
#undef P #undef P
SEQ_printf(m, " .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling", SEQ_printf(m, " .%-40s: %d (%s)\n",
"sysctl_sched_tunable_scaling",
sysctl_sched_tunable_scaling, sysctl_sched_tunable_scaling,
sched_tunable_scaling_names[sysctl_sched_tunable_scaling]); sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
SEQ_printf(m, "\n");
}
for_each_online_cpu(cpu) static int sched_debug_show(struct seq_file *m, void *v)
print_cpu(m, cpu); {
int cpu = (unsigned long)(v - 2);
SEQ_printf(m, "\n"); if (cpu != -1)
print_cpu(m, cpu);
else
sched_debug_header(m);
return 0; return 0;
} }
void sysrq_sched_debug_show(void) void sysrq_sched_debug_show(void)
{ {
sched_debug_show(NULL, NULL); int cpu;
sched_debug_header(NULL);
for_each_online_cpu(cpu)
print_cpu(NULL, cpu);
}
/*
* This itererator needs some explanation.
* It returns 1 for the header position.
* This means 2 is cpu 0.
* In a hotplugged system some cpus, including cpu 0, may be missing so we have
* to use cpumask_* to iterate over the cpus.
*/
static void *sched_debug_start(struct seq_file *file, loff_t *offset)
{
unsigned long n = *offset;
if (n == 0)
return (void *) 1;
n--;
if (n > 0)
n = cpumask_next(n - 1, cpu_online_mask);
else
n = cpumask_first(cpu_online_mask);
*offset = n + 1;
if (n < nr_cpu_ids)
return (void *)(unsigned long)(n + 2);
return NULL;
}
static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset)
{
(*offset)++;
return sched_debug_start(file, offset);
}
static void sched_debug_stop(struct seq_file *file, void *data)
{
}
static const struct seq_operations sched_debug_sops = {
.start = sched_debug_start,
.next = sched_debug_next,
.stop = sched_debug_stop,
.show = sched_debug_show,
};
static int sched_debug_release(struct inode *inode, struct file *file)
{
seq_release(inode, file);
return 0;
} }
static int sched_debug_open(struct inode *inode, struct file *filp) static int sched_debug_open(struct inode *inode, struct file *filp)
{ {
return single_open(filp, sched_debug_show, NULL); int ret = 0;
ret = seq_open(filp, &sched_debug_sops);
return ret;
} }
static const struct file_operations sched_debug_fops = { static const struct file_operations sched_debug_fops = {
.open = sched_debug_open, .open = sched_debug_open,
.read = seq_read, .read = seq_read,
.llseek = seq_lseek, .llseek = seq_lseek,
.release = single_release, .release = sched_debug_release,
}; };
static int __init init_sched_debug_procfs(void) static int __init init_sched_debug_procfs(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment