Commit dd02d423 authored by Andrey Ryabinin's avatar Andrey Ryabinin Committed by Peter Zijlstra

sched/cpuacct: Fix user/system in shown cpuacct.usage*

cpuacct has 2 different ways of accounting and showing user
and system times.

The first one uses cpuacct_account_field() to account times
and cpuacct.stat file to expose them. And this one seems to work ok.

The second one is uses cpuacct_charge() function for accounting and
set of cpuacct.usage* files to show times. Despite some attempts to
fix it in the past it still doesn't work. Sometimes while running KVM
guest the cpuacct_charge() accounts most of the guest time as
system time. This doesn't match with user&system times shown in
cpuacct.stat or proc/<pid>/stat.

Demonstration:
 # git clone https://github.com/aryabinin/kvmsample
 # make
 # mkdir /sys/fs/cgroup/cpuacct/test
 # echo $$ > /sys/fs/cgroup/cpuacct/test/tasks
 # ./kvmsample &
 # for i in {1..5}; do cat /sys/fs/cgroup/cpuacct/test/cpuacct.usage_sys; sleep 1; done
 1976535645
 2979839428
 3979832704
 4983603153
 5983604157

Use cpustats accounted in cpuacct_account_field() as the source
of user/sys times for cpuacct.usage* files. Make cpuacct_charge()
to account only summary execution time.

Fixes: d740037f ("sched/cpuacct: Split usage accounting into user_usage and sys_usage")
Signed-off-by: default avatarAndrey Ryabinin <arbn@yandex-team.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarDaniel Jordan <daniel.m.jordan@oracle.com>
Acked-by: default avatarTejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20211115164607.23784-3-arbn@yandex-team.com
parent c7ccbf4b
...@@ -21,15 +21,11 @@ static const char * const cpuacct_stat_desc[] = { ...@@ -21,15 +21,11 @@ static const char * const cpuacct_stat_desc[] = {
[CPUACCT_STAT_SYSTEM] = "system", [CPUACCT_STAT_SYSTEM] = "system",
}; };
struct cpuacct_usage {
u64 usages[CPUACCT_STAT_NSTATS];
};
/* track CPU usage of a group of tasks and its child groups */ /* track CPU usage of a group of tasks and its child groups */
struct cpuacct { struct cpuacct {
struct cgroup_subsys_state css; struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every CPU */ /* cpuusage holds pointer to a u64-type object on every CPU */
struct cpuacct_usage __percpu *cpuusage; u64 __percpu *cpuusage;
struct kernel_cpustat __percpu *cpustat; struct kernel_cpustat __percpu *cpustat;
}; };
...@@ -49,7 +45,7 @@ static inline struct cpuacct *parent_ca(struct cpuacct *ca) ...@@ -49,7 +45,7 @@ static inline struct cpuacct *parent_ca(struct cpuacct *ca)
return css_ca(ca->css.parent); return css_ca(ca->css.parent);
} }
static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage); static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
static struct cpuacct root_cpuacct = { static struct cpuacct root_cpuacct = {
.cpustat = &kernel_cpustat, .cpustat = &kernel_cpustat,
.cpuusage = &root_cpuacct_cpuusage, .cpuusage = &root_cpuacct_cpuusage,
...@@ -68,7 +64,7 @@ cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) ...@@ -68,7 +64,7 @@ cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
if (!ca) if (!ca)
goto out; goto out;
ca->cpuusage = alloc_percpu(struct cpuacct_usage); ca->cpuusage = alloc_percpu(u64);
if (!ca->cpuusage) if (!ca->cpuusage)
goto out_free_ca; goto out_free_ca;
...@@ -99,7 +95,8 @@ static void cpuacct_css_free(struct cgroup_subsys_state *css) ...@@ -99,7 +95,8 @@ static void cpuacct_css_free(struct cgroup_subsys_state *css)
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
enum cpuacct_stat_index index) enum cpuacct_stat_index index)
{ {
struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
u64 data; u64 data;
/* /*
...@@ -116,14 +113,17 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, ...@@ -116,14 +113,17 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
raw_spin_rq_lock_irq(cpu_rq(cpu)); raw_spin_rq_lock_irq(cpu_rq(cpu));
#endif #endif
if (index == CPUACCT_STAT_NSTATS) { switch (index) {
int i = 0; case CPUACCT_STAT_USER:
data = cpustat[CPUTIME_USER] + cpustat[CPUTIME_NICE];
data = 0; break;
for (i = 0; i < CPUACCT_STAT_NSTATS; i++) case CPUACCT_STAT_SYSTEM:
data += cpuusage->usages[i]; data = cpustat[CPUTIME_SYSTEM] + cpustat[CPUTIME_IRQ] +
} else { cpustat[CPUTIME_SOFTIRQ];
data = cpuusage->usages[index]; break;
case CPUACCT_STAT_NSTATS:
data = *cpuusage;
break;
} }
#ifndef CONFIG_64BIT #ifndef CONFIG_64BIT
...@@ -133,10 +133,14 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, ...@@ -133,10 +133,14 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
return data; return data;
} }
static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu)
{ {
struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
int i; u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
/* Don't allow to reset global kernel_cpustat */
if (ca == &root_cpuacct)
return;
#ifndef CONFIG_64BIT #ifndef CONFIG_64BIT
/* /*
...@@ -144,9 +148,10 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) ...@@ -144,9 +148,10 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
*/ */
raw_spin_rq_lock_irq(cpu_rq(cpu)); raw_spin_rq_lock_irq(cpu_rq(cpu));
#endif #endif
*cpuusage = 0;
for (i = 0; i < CPUACCT_STAT_NSTATS; i++) cpustat[CPUTIME_USER] = cpustat[CPUTIME_NICE] = 0;
cpuusage->usages[i] = val; cpustat[CPUTIME_SYSTEM] = cpustat[CPUTIME_IRQ] = 0;
cpustat[CPUTIME_SOFTIRQ] = 0;
#ifndef CONFIG_64BIT #ifndef CONFIG_64BIT
raw_spin_rq_unlock_irq(cpu_rq(cpu)); raw_spin_rq_unlock_irq(cpu_rq(cpu));
...@@ -197,7 +202,7 @@ static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, ...@@ -197,7 +202,7 @@ static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
return -EINVAL; return -EINVAL;
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
cpuacct_cpuusage_write(ca, cpu, 0); cpuacct_cpuusage_write(ca, cpu);
return 0; return 0;
} }
...@@ -244,25 +249,10 @@ static int cpuacct_all_seq_show(struct seq_file *m, void *V) ...@@ -244,25 +249,10 @@ static int cpuacct_all_seq_show(struct seq_file *m, void *V)
seq_puts(m, "\n"); seq_puts(m, "\n");
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
seq_printf(m, "%d", cpu); seq_printf(m, "%d", cpu);
for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
for (index = 0; index < CPUACCT_STAT_NSTATS; index++) { seq_printf(m, " %llu",
#ifndef CONFIG_64BIT cpuacct_cpuusage_read(ca, cpu, index));
/*
* Take rq->lock to make 64-bit read safe on 32-bit
* platforms.
*/
raw_spin_rq_lock_irq(cpu_rq(cpu));
#endif
seq_printf(m, " %llu", cpuusage->usages[index]);
#ifndef CONFIG_64BIT
raw_spin_rq_unlock_irq(cpu_rq(cpu));
#endif
}
seq_puts(m, "\n"); seq_puts(m, "\n");
} }
return 0; return 0;
...@@ -340,16 +330,11 @@ static struct cftype files[] = { ...@@ -340,16 +330,11 @@ static struct cftype files[] = {
void cpuacct_charge(struct task_struct *tsk, u64 cputime) void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{ {
struct cpuacct *ca; struct cpuacct *ca;
int index = CPUACCT_STAT_SYSTEM;
struct pt_regs *regs = get_irq_regs() ? : task_pt_regs(tsk);
if (regs && user_mode(regs))
index = CPUACCT_STAT_USER;
rcu_read_lock(); rcu_read_lock();
for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
__this_cpu_add(ca->cpuusage->usages[index], cputime); __this_cpu_add(*ca->cpuusage, cputime);
rcu_read_unlock(); rcu_read_unlock();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment