Commit df774306 authored by Shakeel Butt's avatar Shakeel Butt Committed by Peter Zijlstra

psi: Reduce calls to sched_clock() in psi

We noticed that the cost of psi increases with the increase in the
levels of the cgroups. Particularly the cost of cpu_clock() sticks out
as the kernel calls it multiple times as it traverses up the cgroup
tree. This patch reduces the calls to cpu_clock().

Performed perf bench on Intel Broadwell with 3 levels of cgroup.

Before the patch:

$ perf bench sched all
 # Running sched/messaging benchmark...
 # 20 sender and receiver processes per group
 # 10 groups == 400 processes run

     Total time: 0.747 [sec]

 # Running sched/pipe benchmark...
 # Executed 1000000 pipe operations between two processes

     Total time: 3.516 [sec]

       3.516689 usecs/op
         284358 ops/sec

After the patch:

$ perf bench sched all
 # Running sched/messaging benchmark...
 # 20 sender and receiver processes per group
 # 10 groups == 400 processes run

     Total time: 0.640 [sec]

 # Running sched/pipe benchmark...
 # Executed 1000000 pipe operations between two processes

     Total time: 3.329 [sec]

       3.329820 usecs/op
         300316 ops/sec
Signed-off-by: default avatarShakeel Butt <shakeelb@google.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Link: https://lkml.kernel.org/r/20210321205156.4186483-1-shakeelb@google.com
parent 2a2f80ff
...@@ -644,12 +644,10 @@ static void poll_timer_fn(struct timer_list *t) ...@@ -644,12 +644,10 @@ static void poll_timer_fn(struct timer_list *t)
wake_up_interruptible(&group->poll_wait); wake_up_interruptible(&group->poll_wait);
} }
static void record_times(struct psi_group_cpu *groupc, int cpu) static void record_times(struct psi_group_cpu *groupc, u64 now)
{ {
u32 delta; u32 delta;
u64 now;
now = cpu_clock(cpu);
delta = now - groupc->state_start; delta = now - groupc->state_start;
groupc->state_start = now; groupc->state_start = now;
...@@ -676,7 +674,7 @@ static void record_times(struct psi_group_cpu *groupc, int cpu) ...@@ -676,7 +674,7 @@ static void record_times(struct psi_group_cpu *groupc, int cpu)
} }
static void psi_group_change(struct psi_group *group, int cpu, static void psi_group_change(struct psi_group *group, int cpu,
unsigned int clear, unsigned int set, unsigned int clear, unsigned int set, u64 now,
bool wake_clock) bool wake_clock)
{ {
struct psi_group_cpu *groupc; struct psi_group_cpu *groupc;
...@@ -696,7 +694,7 @@ static void psi_group_change(struct psi_group *group, int cpu, ...@@ -696,7 +694,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
*/ */
write_seqcount_begin(&groupc->seq); write_seqcount_begin(&groupc->seq);
record_times(groupc, cpu); record_times(groupc, now);
for (t = 0, m = clear; m; m &= ~(1 << t), t++) { for (t = 0, m = clear; m; m &= ~(1 << t), t++) {
if (!(m & (1 << t))) if (!(m & (1 << t)))
...@@ -788,12 +786,14 @@ void psi_task_change(struct task_struct *task, int clear, int set) ...@@ -788,12 +786,14 @@ void psi_task_change(struct task_struct *task, int clear, int set)
struct psi_group *group; struct psi_group *group;
bool wake_clock = true; bool wake_clock = true;
void *iter = NULL; void *iter = NULL;
u64 now;
if (!task->pid) if (!task->pid)
return; return;
psi_flags_change(task, clear, set); psi_flags_change(task, clear, set);
now = cpu_clock(cpu);
/* /*
* Periodic aggregation shuts off if there is a period of no * Periodic aggregation shuts off if there is a period of no
* task changes, so we wake it back up if necessary. However, * task changes, so we wake it back up if necessary. However,
...@@ -806,7 +806,7 @@ void psi_task_change(struct task_struct *task, int clear, int set) ...@@ -806,7 +806,7 @@ void psi_task_change(struct task_struct *task, int clear, int set)
wake_clock = false; wake_clock = false;
while ((group = iterate_groups(task, &iter))) while ((group = iterate_groups(task, &iter)))
psi_group_change(group, cpu, clear, set, wake_clock); psi_group_change(group, cpu, clear, set, now, wake_clock);
} }
void psi_task_switch(struct task_struct *prev, struct task_struct *next, void psi_task_switch(struct task_struct *prev, struct task_struct *next,
...@@ -815,6 +815,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next, ...@@ -815,6 +815,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
struct psi_group *group, *common = NULL; struct psi_group *group, *common = NULL;
int cpu = task_cpu(prev); int cpu = task_cpu(prev);
void *iter; void *iter;
u64 now = cpu_clock(cpu);
if (next->pid) { if (next->pid) {
bool identical_state; bool identical_state;
...@@ -836,7 +837,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next, ...@@ -836,7 +837,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
break; break;
} }
psi_group_change(group, cpu, 0, TSK_ONCPU, true); psi_group_change(group, cpu, 0, TSK_ONCPU, now, true);
} }
} }
...@@ -858,7 +859,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next, ...@@ -858,7 +859,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
iter = NULL; iter = NULL;
while ((group = iterate_groups(prev, &iter)) && group != common) while ((group = iterate_groups(prev, &iter)) && group != common)
psi_group_change(group, cpu, clear, set, true); psi_group_change(group, cpu, clear, set, now, true);
/* /*
* TSK_ONCPU is handled up to the common ancestor. If we're tasked * TSK_ONCPU is handled up to the common ancestor. If we're tasked
...@@ -867,7 +868,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next, ...@@ -867,7 +868,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
if (sleep) { if (sleep) {
clear &= ~TSK_ONCPU; clear &= ~TSK_ONCPU;
for (; group; group = iterate_groups(prev, &iter)) for (; group; group = iterate_groups(prev, &iter))
psi_group_change(group, cpu, clear, set, true); psi_group_change(group, cpu, clear, set, now, true);
} }
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment