Commit 2aaf709a authored by Rafael J. Wysocki's avatar Rafael J. Wysocki

sched: idle: Do not stop the tick upfront in the idle loop

Push the decision whether or not to stop the tick somewhat deeper
into the idle loop.

Stopping the tick upfront leads to unpleasant outcomes in case the
idle governor doesn't agree with the nohz code on the duration of the
upcoming idle period.  Specifically, if the tick has been stopped and
the idle governor predicts short idle, the situation is bad regardless
of whether or not the prediction is accurate.  If it is accurate, the
tick has been stopped unnecessarily which means excessive overhead.
If it is not accurate, the CPU is likely to spend too much time in
the (shallow, because short idle has been predicted) idle state
selected by the governor [1].

As the first step towards addressing this problem, change the code
to make the tick stopping decision inside of the loop in do_idle().
In particular, do not stop the tick in the cpu_idle_poll() code path.
Also don't do that in tick_nohz_irq_exit() which doesn't really have
enough information on whether or not to stop the tick.

Link: https://marc.info/?l=linux-pm&m=150116085925208&w=2 # [1]
Link: https://tu-dresden.de/zih/forschung/ressourcen/dateien/projekte/haec/powernightmares.pdfSuggested-by: default avatarFrederic Weisbecker <frederic@kernel.org>
Signed-off-by: default avatarRafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: default avatarFrederic Weisbecker <frederic@kernel.org>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
parent 0e776768
...@@ -116,6 +116,7 @@ extern bool tick_nohz_enabled; ...@@ -116,6 +116,7 @@ extern bool tick_nohz_enabled;
extern bool tick_nohz_tick_stopped(void); extern bool tick_nohz_tick_stopped(void);
extern bool tick_nohz_tick_stopped_cpu(int cpu); extern bool tick_nohz_tick_stopped_cpu(int cpu);
extern void tick_nohz_idle_stop_tick(void); extern void tick_nohz_idle_stop_tick(void);
extern void tick_nohz_idle_restart_tick(void);
extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_enter(void);
extern void tick_nohz_idle_exit(void); extern void tick_nohz_idle_exit(void);
extern void tick_nohz_irq_exit(void); extern void tick_nohz_irq_exit(void);
...@@ -137,6 +138,7 @@ static inline void tick_nohz_idle_stop_tick_protected(void) ...@@ -137,6 +138,7 @@ static inline void tick_nohz_idle_stop_tick_protected(void)
static inline int tick_nohz_tick_stopped(void) { return 0; } static inline int tick_nohz_tick_stopped(void) { return 0; }
static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; } static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; }
static inline void tick_nohz_idle_stop_tick(void) { } static inline void tick_nohz_idle_stop_tick(void) { }
static inline void tick_nohz_idle_restart_tick(void) { }
static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_enter(void) { }
static inline void tick_nohz_idle_exit(void) { } static inline void tick_nohz_idle_exit(void) { }
......
...@@ -216,13 +216,13 @@ static void do_idle(void) ...@@ -216,13 +216,13 @@ static void do_idle(void)
__current_set_polling(); __current_set_polling();
tick_nohz_idle_enter(); tick_nohz_idle_enter();
tick_nohz_idle_stop_tick_protected();
while (!need_resched()) { while (!need_resched()) {
check_pgt_cache(); check_pgt_cache();
rmb(); rmb();
if (cpu_is_offline(cpu)) { if (cpu_is_offline(cpu)) {
tick_nohz_idle_stop_tick_protected();
cpuhp_report_idle_dead(); cpuhp_report_idle_dead();
arch_cpu_idle_dead(); arch_cpu_idle_dead();
} }
...@@ -236,10 +236,13 @@ static void do_idle(void) ...@@ -236,10 +236,13 @@ static void do_idle(void)
* broadcast device expired for us, we don't want to go deep * broadcast device expired for us, we don't want to go deep
* idle as we know that the IPI is going to arrive right away. * idle as we know that the IPI is going to arrive right away.
*/ */
if (cpu_idle_force_poll || tick_check_broadcast_expired()) if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
tick_nohz_idle_restart_tick();
cpu_idle_poll(); cpu_idle_poll();
else } else {
tick_nohz_idle_stop_tick();
cpuidle_idle_call(); cpuidle_idle_call();
}
arch_cpu_idle_exit(); arch_cpu_idle_exit();
} }
......
...@@ -960,12 +960,10 @@ void tick_nohz_irq_exit(void) ...@@ -960,12 +960,10 @@ void tick_nohz_irq_exit(void)
{ {
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
if (ts->inidle) { if (ts->inidle)
tick_nohz_start_idle(ts); tick_nohz_start_idle(ts);
__tick_nohz_idle_stop_tick(ts); else
} else {
tick_nohz_full_update_tick(ts); tick_nohz_full_update_tick(ts);
}
} }
/** /**
...@@ -1026,6 +1024,20 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts) ...@@ -1026,6 +1024,20 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
#endif #endif
} }
static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now)
{
tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_ticks(ts);
}
void tick_nohz_idle_restart_tick(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
if (ts->tick_stopped)
__tick_nohz_idle_restart_tick(ts, ktime_get());
}
/** /**
* tick_nohz_idle_exit - restart the idle tick from the idle task * tick_nohz_idle_exit - restart the idle tick from the idle task
* *
...@@ -1050,10 +1062,8 @@ void tick_nohz_idle_exit(void) ...@@ -1050,10 +1062,8 @@ void tick_nohz_idle_exit(void)
if (ts->idle_active) if (ts->idle_active)
tick_nohz_stop_idle(ts, now); tick_nohz_stop_idle(ts, now);
if (ts->tick_stopped) { if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, now); __tick_nohz_idle_restart_tick(ts, now);
tick_nohz_account_idle_ticks(ts);
}
local_irq_enable(); local_irq_enable();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment