Commit f5a4c4b7 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Greg Kroah-Hartman

sched/nohz: Fix rq->cpu_load calculations some more

commit 5aaa0b7a upstream.

Follow up on commit 556061b0 ("sched/nohz: Fix rq->cpu_load[]
calculations") since while that fixed the busy case it regressed the
mostly idle case.

Add a callback from the nohz exit to also age the rq->cpu_load[]
array. This closes the hole where either there was no nohz load
balance pass during the nohz, or there was a 'significant' amount of
idle time between the last nohz balance and the nohz exit.

So we'll update unconditionally from the tick to not insert any
accidental 0 load periods while busy, and we try and catch up from
nohz idle balance and nohz exit. Both these are still prone to missing
a jiffy, but that has always been the case.
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Cc: pjt@google.com
Cc: Venkatesh Pallipadi <venki@google.com>
Link: http://lkml.kernel.org/n/tip-kt0trz0apodbf84ucjfdbr1a@git.kernel.orgSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent e2d51f27
...@@ -144,6 +144,7 @@ extern unsigned long this_cpu_load(void); ...@@ -144,6 +144,7 @@ extern unsigned long this_cpu_load(void);
extern void calc_global_load(unsigned long ticks); extern void calc_global_load(unsigned long ticks);
extern void update_cpu_load_nohz(void);
extern unsigned long get_parent_ip(unsigned long addr); extern unsigned long get_parent_ip(unsigned long addr);
......
...@@ -2649,25 +2649,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, ...@@ -2649,25 +2649,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
sched_avg_update(this_rq); sched_avg_update(this_rq);
} }
#ifdef CONFIG_NO_HZ
/*
* There is no sane way to deal with nohz on smp when using jiffies because the
* cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
* causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
*
* Therefore we cannot use the delta approach from the regular tick since that
* would seriously skew the load calculation. However we'll make do for those
* updates happening while idle (nohz_idle_balance) or coming out of idle
* (tick_nohz_idle_exit).
*
* This means we might still be one tick off for nohz periods.
*/
/* /*
* Called from nohz_idle_balance() to update the load ratings before doing the * Called from nohz_idle_balance() to update the load ratings before doing the
* idle balance. * idle balance.
*/ */
void update_idle_cpu_load(struct rq *this_rq) void update_idle_cpu_load(struct rq *this_rq)
{ {
unsigned long curr_jiffies = jiffies; unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
unsigned long load = this_rq->load.weight; unsigned long load = this_rq->load.weight;
unsigned long pending_updates; unsigned long pending_updates;
/* /*
* Bloody broken means of dealing with nohz, but better than nothing.. * bail if there's load or we're actually up-to-date.
* jiffies is updated by one cpu, another cpu can drift wrt the jiffy
* update and see 0 difference the one time and 2 the next, even though
* we ticked at roughtly the same rate.
*
* Hence we only use this from nohz_idle_balance() and skip this
* nonsense when called from the scheduler_tick() since that's
* guaranteed a stable rate.
*/ */
if (load || curr_jiffies == this_rq->last_load_update_tick) if (load || curr_jiffies == this_rq->last_load_update_tick)
return; return;
...@@ -2678,13 +2685,39 @@ void update_idle_cpu_load(struct rq *this_rq) ...@@ -2678,13 +2685,39 @@ void update_idle_cpu_load(struct rq *this_rq)
__update_cpu_load(this_rq, load, pending_updates); __update_cpu_load(this_rq, load, pending_updates);
} }
/*
* Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
*/
void update_cpu_load_nohz(void)
{
struct rq *this_rq = this_rq();
unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
unsigned long pending_updates;
if (curr_jiffies == this_rq->last_load_update_tick)
return;
raw_spin_lock(&this_rq->lock);
pending_updates = curr_jiffies - this_rq->last_load_update_tick;
if (pending_updates) {
this_rq->last_load_update_tick = curr_jiffies;
/*
* We were idle, this means load 0, the current load might be
* !0 due to remote wakeups and the sort.
*/
__update_cpu_load(this_rq, 0, pending_updates);
}
raw_spin_unlock(&this_rq->lock);
}
#endif /* CONFIG_NO_HZ */
/* /*
* Called from scheduler_tick() * Called from scheduler_tick()
*/ */
static void update_cpu_load_active(struct rq *this_rq) static void update_cpu_load_active(struct rq *this_rq)
{ {
/* /*
* See the mess in update_idle_cpu_load(). * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
*/ */
this_rq->last_load_update_tick = jiffies; this_rq->last_load_update_tick = jiffies;
__update_cpu_load(this_rq, this_rq->load.weight, 1); __update_cpu_load(this_rq, this_rq->load.weight, 1);
......
...@@ -582,6 +582,7 @@ void tick_nohz_idle_exit(void) ...@@ -582,6 +582,7 @@ void tick_nohz_idle_exit(void)
/* Update jiffies first */ /* Update jiffies first */
select_nohz_load_balancer(0); select_nohz_load_balancer(0);
tick_do_update_jiffies64(now); tick_do_update_jiffies64(now);
update_cpu_load_nohz();
#ifndef CONFIG_VIRT_CPU_ACCOUNTING #ifndef CONFIG_VIRT_CPU_ACCOUNTING
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment