Commit a01353cf authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

cpuidle: Fix ct_idle_*() usage

The whole disable-RCU, enable-IRQS dance is very intricate since
changing IRQ state is traced, which depends on RCU.

Add two helpers for the cpuidle case that mirror the entry code:

  ct_cpuidle_enter()
  ct_cpuidle_exit()

And fix all the cases where the enter/exit dance was buggy.
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Tested-by: default avatarTony Lindgren <tony@atomide.com>
Tested-by: default avatarUlf Hansson <ulf.hansson@linaro.org>
Acked-by: default avatarRafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: default avatarFrederic Weisbecker <frederic@kernel.org>
Link: https://lore.kernel.org/r/20230112195540.130014793@infradead.org
parent 0c5ffc3d
...@@ -25,9 +25,9 @@ static int imx6q_enter_wait(struct cpuidle_device *dev, ...@@ -25,9 +25,9 @@ static int imx6q_enter_wait(struct cpuidle_device *dev,
imx6_set_lpm(WAIT_UNCLOCKED); imx6_set_lpm(WAIT_UNCLOCKED);
raw_spin_unlock(&cpuidle_lock); raw_spin_unlock(&cpuidle_lock);
ct_idle_enter(); ct_cpuidle_enter();
cpu_do_idle(); cpu_do_idle();
ct_idle_exit(); ct_cpuidle_exit();
raw_spin_lock(&cpuidle_lock); raw_spin_lock(&cpuidle_lock);
if (num_idle_cpus-- == num_online_cpus()) if (num_idle_cpus-- == num_online_cpus())
......
...@@ -47,9 +47,9 @@ static int imx6sx_enter_wait(struct cpuidle_device *dev, ...@@ -47,9 +47,9 @@ static int imx6sx_enter_wait(struct cpuidle_device *dev,
cpu_pm_enter(); cpu_pm_enter();
cpu_cluster_pm_enter(); cpu_cluster_pm_enter();
ct_idle_enter(); ct_cpuidle_enter();
cpu_suspend(0, imx6sx_idle_finish); cpu_suspend(0, imx6sx_idle_finish);
ct_idle_exit(); ct_cpuidle_exit();
cpu_cluster_pm_exit(); cpu_cluster_pm_exit();
cpu_pm_exit(); cpu_pm_exit();
......
...@@ -133,9 +133,9 @@ static int omap3_enter_idle(struct cpuidle_device *dev, ...@@ -133,9 +133,9 @@ static int omap3_enter_idle(struct cpuidle_device *dev,
} }
/* Execute ARM wfi */ /* Execute ARM wfi */
ct_idle_enter(); ct_cpuidle_enter();
omap_sram_idle(); omap_sram_idle();
ct_idle_exit(); ct_cpuidle_exit();
/* /*
* Call idle CPU PM enter notifier chain to restore * Call idle CPU PM enter notifier chain to restore
......
...@@ -105,9 +105,9 @@ static int omap_enter_idle_smp(struct cpuidle_device *dev, ...@@ -105,9 +105,9 @@ static int omap_enter_idle_smp(struct cpuidle_device *dev,
} }
raw_spin_unlock_irqrestore(&mpu_lock, flag); raw_spin_unlock_irqrestore(&mpu_lock, flag);
ct_idle_enter(); ct_cpuidle_enter();
omap4_enter_lowpower(dev->cpu, cx->cpu_state); omap4_enter_lowpower(dev->cpu, cx->cpu_state);
ct_idle_exit(); ct_cpuidle_exit();
raw_spin_lock_irqsave(&mpu_lock, flag); raw_spin_lock_irqsave(&mpu_lock, flag);
if (cx->mpu_state_vote == num_online_cpus()) if (cx->mpu_state_vote == num_online_cpus())
...@@ -186,10 +186,10 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, ...@@ -186,10 +186,10 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
} }
} }
ct_idle_enter(); ct_cpuidle_enter();
omap4_enter_lowpower(dev->cpu, cx->cpu_state); omap4_enter_lowpower(dev->cpu, cx->cpu_state);
cpu_done[dev->cpu] = true; cpu_done[dev->cpu] = true;
ct_idle_exit(); ct_cpuidle_exit();
/* Wakeup CPU1 only if it is not offlined */ /* Wakeup CPU1 only if it is not offlined */
if (dev->cpu == 0 && cpumask_test_cpu(1, cpu_online_mask)) { if (dev->cpu == 0 && cpumask_test_cpu(1, cpu_online_mask)) {
......
...@@ -642,6 +642,8 @@ static int __cpuidle acpi_idle_enter_bm(struct cpuidle_driver *drv, ...@@ -642,6 +642,8 @@ static int __cpuidle acpi_idle_enter_bm(struct cpuidle_driver *drv,
*/ */
bool dis_bm = pr->flags.bm_control; bool dis_bm = pr->flags.bm_control;
instrumentation_begin();
/* If we can skip BM, demote to a safe state. */ /* If we can skip BM, demote to a safe state. */
if (!cx->bm_sts_skip && acpi_idle_bm_check()) { if (!cx->bm_sts_skip && acpi_idle_bm_check()) {
dis_bm = false; dis_bm = false;
...@@ -663,11 +665,11 @@ static int __cpuidle acpi_idle_enter_bm(struct cpuidle_driver *drv, ...@@ -663,11 +665,11 @@ static int __cpuidle acpi_idle_enter_bm(struct cpuidle_driver *drv,
raw_spin_unlock(&c3_lock); raw_spin_unlock(&c3_lock);
} }
ct_idle_enter(); ct_cpuidle_enter();
acpi_idle_do_entry(cx); acpi_idle_do_entry(cx);
ct_idle_exit(); ct_cpuidle_exit();
/* Re-enable bus master arbitration */ /* Re-enable bus master arbitration */
if (dis_bm) { if (dis_bm) {
...@@ -677,6 +679,8 @@ static int __cpuidle acpi_idle_enter_bm(struct cpuidle_driver *drv, ...@@ -677,6 +679,8 @@ static int __cpuidle acpi_idle_enter_bm(struct cpuidle_driver *drv,
raw_spin_unlock(&c3_lock); raw_spin_unlock(&c3_lock);
} }
instrumentation_end();
return index; return index;
} }
......
...@@ -126,13 +126,13 @@ static int bl_enter_powerdown(struct cpuidle_device *dev, ...@@ -126,13 +126,13 @@ static int bl_enter_powerdown(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int idx) struct cpuidle_driver *drv, int idx)
{ {
cpu_pm_enter(); cpu_pm_enter();
ct_idle_enter(); ct_cpuidle_enter();
cpu_suspend(0, bl_powerdown_finisher); cpu_suspend(0, bl_powerdown_finisher);
/* signals the MCPM core that CPU is out of low power state */ /* signals the MCPM core that CPU is out of low power state */
mcpm_cpu_powered_up(); mcpm_cpu_powered_up();
ct_idle_exit(); ct_cpuidle_exit();
cpu_pm_exit(); cpu_pm_exit();
......
...@@ -36,9 +36,9 @@ static int mvebu_v7_enter_idle(struct cpuidle_device *dev, ...@@ -36,9 +36,9 @@ static int mvebu_v7_enter_idle(struct cpuidle_device *dev,
if (drv->states[index].flags & MVEBU_V7_FLAG_DEEP_IDLE) if (drv->states[index].flags & MVEBU_V7_FLAG_DEEP_IDLE)
deepidle = true; deepidle = true;
ct_idle_enter(); ct_cpuidle_enter();
ret = mvebu_v7_cpu_suspend(deepidle); ret = mvebu_v7_cpu_suspend(deepidle);
ct_idle_exit(); ct_cpuidle_exit();
cpu_pm_exit(); cpu_pm_exit();
......
...@@ -74,7 +74,7 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev, ...@@ -74,7 +74,7 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
else else
pm_runtime_put_sync_suspend(pd_dev); pm_runtime_put_sync_suspend(pd_dev);
ct_idle_enter(); ct_cpuidle_enter();
state = psci_get_domain_state(); state = psci_get_domain_state();
if (!state) if (!state)
...@@ -82,7 +82,7 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev, ...@@ -82,7 +82,7 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
ret = psci_cpu_suspend_enter(state) ? -1 : idx; ret = psci_cpu_suspend_enter(state) ? -1 : idx;
ct_idle_exit(); ct_cpuidle_exit();
if (s2idle) if (s2idle)
dev_pm_genpd_resume(pd_dev); dev_pm_genpd_resume(pd_dev);
......
...@@ -126,7 +126,7 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev, ...@@ -126,7 +126,7 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
else else
pm_runtime_put_sync_suspend(pd_dev); pm_runtime_put_sync_suspend(pd_dev);
ct_idle_enter(); ct_cpuidle_enter();
if (sbi_is_domain_state_available()) if (sbi_is_domain_state_available())
state = sbi_get_domain_state(); state = sbi_get_domain_state();
...@@ -135,7 +135,7 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev, ...@@ -135,7 +135,7 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
ret = sbi_suspend(state) ? -1 : idx; ret = sbi_suspend(state) ? -1 : idx;
ct_idle_exit(); ct_cpuidle_exit();
if (s2idle) if (s2idle)
dev_pm_genpd_resume(pd_dev); dev_pm_genpd_resume(pd_dev);
......
...@@ -183,7 +183,7 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev, ...@@ -183,7 +183,7 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
tegra_pm_set_cpu_in_lp2(); tegra_pm_set_cpu_in_lp2();
cpu_pm_enter(); cpu_pm_enter();
ct_idle_enter(); ct_cpuidle_enter();
switch (index) { switch (index) {
case TEGRA_C7: case TEGRA_C7:
...@@ -199,7 +199,7 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev, ...@@ -199,7 +199,7 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
break; break;
} }
ct_idle_exit(); ct_cpuidle_exit();
cpu_pm_exit(); cpu_pm_exit();
tegra_pm_clear_cpu_in_lp2(); tegra_pm_clear_cpu_in_lp2();
...@@ -240,10 +240,10 @@ static int tegra_cpuidle_enter(struct cpuidle_device *dev, ...@@ -240,10 +240,10 @@ static int tegra_cpuidle_enter(struct cpuidle_device *dev,
if (index == TEGRA_C1) { if (index == TEGRA_C1) {
if (do_rcu) if (do_rcu)
ct_idle_enter(); ct_cpuidle_enter();
ret = arm_cpuidle_simple_enter(dev, drv, index); ret = arm_cpuidle_simple_enter(dev, drv, index);
if (do_rcu) if (do_rcu)
ct_idle_exit(); ct_cpuidle_exit();
} else } else
ret = tegra_cpuidle_state_enter(dev, index, cpu); ret = tegra_cpuidle_state_enter(dev, index, cpu);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/clock.h> #include <linux/sched/clock.h>
#include <linux/sched/idle.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/pm_qos.h> #include <linux/pm_qos.h>
#include <linux/cpu.h> #include <linux/cpu.h>
...@@ -152,12 +153,12 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv, ...@@ -152,12 +153,12 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv,
*/ */
stop_critical_timings(); stop_critical_timings();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
ct_idle_enter(); ct_cpuidle_enter();
target_state->enter_s2idle(dev, drv, index); target_state->enter_s2idle(dev, drv, index);
if (WARN_ON_ONCE(!irqs_disabled())) if (WARN_ON_ONCE(!irqs_disabled()))
local_irq_disable(); raw_local_irq_disable();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
ct_idle_exit(); ct_cpuidle_exit();
tick_unfreeze(); tick_unfreeze();
start_critical_timings(); start_critical_timings();
...@@ -235,14 +236,14 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, ...@@ -235,14 +236,14 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
stop_critical_timings(); stop_critical_timings();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
ct_idle_enter(); ct_cpuidle_enter();
entered_state = target_state->enter(dev, drv, index); entered_state = target_state->enter(dev, drv, index);
if (WARN_ONCE(!irqs_disabled(), "%ps leaked IRQ state", target_state->enter)) if (WARN_ONCE(!irqs_disabled(), "%ps leaked IRQ state", target_state->enter))
raw_local_irq_disable(); raw_local_irq_disable();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
ct_idle_exit(); ct_cpuidle_exit();
start_critical_timings(); start_critical_timings();
sched_clock_idle_wakeup_event(); sched_clock_idle_wakeup_event();
......
...@@ -211,7 +211,7 @@ extern int tick_receive_broadcast(void); ...@@ -211,7 +211,7 @@ extern int tick_receive_broadcast(void);
extern void tick_setup_hrtimer_broadcast(void); extern void tick_setup_hrtimer_broadcast(void);
extern int tick_check_broadcast_expired(void); extern int tick_check_broadcast_expired(void);
# else # else
static inline int tick_check_broadcast_expired(void) { return 0; } static __always_inline int tick_check_broadcast_expired(void) { return 0; }
static inline void tick_setup_hrtimer_broadcast(void) { } static inline void tick_setup_hrtimer_broadcast(void) { }
# endif # endif
...@@ -219,7 +219,7 @@ static inline void tick_setup_hrtimer_broadcast(void) { } ...@@ -219,7 +219,7 @@ static inline void tick_setup_hrtimer_broadcast(void) { }
static inline void clockevents_suspend(void) { } static inline void clockevents_suspend(void) { }
static inline void clockevents_resume(void) { } static inline void clockevents_resume(void) { }
static inline int tick_check_broadcast_expired(void) { return 0; } static __always_inline int tick_check_broadcast_expired(void) { return 0; }
static inline void tick_setup_hrtimer_broadcast(void) { } static inline void tick_setup_hrtimer_broadcast(void) { }
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/hrtimer.h> #include <linux/hrtimer.h>
#include <linux/context_tracking.h>
#define CPUIDLE_STATE_MAX 10 #define CPUIDLE_STATE_MAX 10
#define CPUIDLE_NAME_LEN 16 #define CPUIDLE_NAME_LEN 16
...@@ -115,6 +116,35 @@ struct cpuidle_device { ...@@ -115,6 +116,35 @@ struct cpuidle_device {
DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices); DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev); DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev);
static __always_inline void ct_cpuidle_enter(void)
{
lockdep_assert_irqs_disabled();
/*
* Idle is allowed to (temporary) enable IRQs. It
* will return with IRQs disabled.
*
* Trace IRQs enable here, then switch off RCU, and have
* arch_cpu_idle() use raw_local_irq_enable(). Note that
* ct_idle_enter() relies on lockdep IRQ state, so switch that
* last -- this is very similar to the entry code.
*/
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
instrumentation_end();
ct_idle_enter();
lockdep_hardirqs_on(_RET_IP_);
}
static __always_inline void ct_cpuidle_exit(void)
{
/*
* Carefully undo the above.
*/
lockdep_hardirqs_off(_RET_IP_);
ct_idle_exit();
instrumentation_begin();
}
/**************************** /****************************
* CPUIDLE DRIVER INTERFACE * * CPUIDLE DRIVER INTERFACE *
****************************/ ****************************/
...@@ -289,9 +319,9 @@ extern s64 cpuidle_governor_latency_req(unsigned int cpu); ...@@ -289,9 +319,9 @@ extern s64 cpuidle_governor_latency_req(unsigned int cpu);
if (!is_retention) \ if (!is_retention) \
__ret = cpu_pm_enter(); \ __ret = cpu_pm_enter(); \
if (!__ret) { \ if (!__ret) { \
ct_idle_enter(); \ ct_cpuidle_enter(); \
__ret = low_level_idle_enter(state); \ __ret = low_level_idle_enter(state); \
ct_idle_exit(); \ ct_cpuidle_exit(); \
if (!is_retention) \ if (!is_retention) \
cpu_pm_exit(); \ cpu_pm_exit(); \
} \ } \
......
...@@ -51,18 +51,22 @@ __setup("hlt", cpu_idle_nopoll_setup); ...@@ -51,18 +51,22 @@ __setup("hlt", cpu_idle_nopoll_setup);
static noinline int __cpuidle cpu_idle_poll(void) static noinline int __cpuidle cpu_idle_poll(void)
{ {
instrumentation_begin();
trace_cpu_idle(0, smp_processor_id()); trace_cpu_idle(0, smp_processor_id());
stop_critical_timings(); stop_critical_timings();
ct_idle_enter(); ct_cpuidle_enter();
local_irq_enable();
raw_local_irq_enable();
while (!tif_need_resched() && while (!tif_need_resched() &&
(cpu_idle_force_poll || tick_check_broadcast_expired())) (cpu_idle_force_poll || tick_check_broadcast_expired()))
cpu_relax(); cpu_relax();
raw_local_irq_disable();
ct_idle_exit(); ct_cpuidle_exit();
start_critical_timings(); start_critical_timings();
trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
local_irq_enable();
instrumentation_end();
return 1; return 1;
} }
...@@ -85,44 +89,21 @@ void __weak arch_cpu_idle(void) ...@@ -85,44 +89,21 @@ void __weak arch_cpu_idle(void)
*/ */
void __cpuidle default_idle_call(void) void __cpuidle default_idle_call(void)
{ {
if (current_clr_polling_and_test()) { instrumentation_begin();
local_irq_enable(); if (!current_clr_polling_and_test()) {
} else {
trace_cpu_idle(1, smp_processor_id()); trace_cpu_idle(1, smp_processor_id());
stop_critical_timings(); stop_critical_timings();
/* ct_cpuidle_enter();
* arch_cpu_idle() is supposed to enable IRQs, however
* we can't do that because of RCU and tracing.
*
* Trace IRQs enable here, then switch off RCU, and have
* arch_cpu_idle() use raw_local_irq_enable(). Note that
* ct_idle_enter() relies on lockdep IRQ state, so switch that
* last -- this is very similar to the entry code.
*/
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
ct_idle_enter();
lockdep_hardirqs_on(_THIS_IP_);
arch_cpu_idle(); arch_cpu_idle();
/*
* OK, so IRQs are enabled here, but RCU needs them disabled to
* turn itself back on.. funny thing is that disabling IRQs
* will cause tracing, which needs RCU. Jump through hoops to
* make it 'work'.
*/
raw_local_irq_disable(); raw_local_irq_disable();
lockdep_hardirqs_off(_THIS_IP_); ct_cpuidle_exit();
ct_idle_exit();
lockdep_hardirqs_on(_THIS_IP_);
raw_local_irq_enable();
start_critical_timings(); start_critical_timings();
trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
} }
local_irq_enable();
instrumentation_end();
} }
static int call_cpuidle_s2idle(struct cpuidle_driver *drv, static int call_cpuidle_s2idle(struct cpuidle_driver *drv,
......
...@@ -622,9 +622,13 @@ struct cpumask *tick_get_broadcast_oneshot_mask(void) ...@@ -622,9 +622,13 @@ struct cpumask *tick_get_broadcast_oneshot_mask(void)
* to avoid a deep idle transition as we are about to get the * to avoid a deep idle transition as we are about to get the
* broadcast IPI right away. * broadcast IPI right away.
*/ */
int tick_check_broadcast_expired(void) noinstr int tick_check_broadcast_expired(void)
{ {
#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
return arch_test_bit(smp_processor_id(), cpumask_bits(tick_broadcast_force_mask));
#else
return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask); return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
#endif
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment