Commit 58c644ba authored by Peter Zijlstra's avatar Peter Zijlstra

sched/idle: Fix arch_cpu_idle() vs tracing

We call arch_cpu_idle() with RCU disabled, but then use
local_irq_{en,dis}able(), which invokes tracing, which relies on RCU.

Switch all arch_cpu_idle() implementations to use
raw_local_irq_{en,dis}able() and carefully manage the
lockdep,rcu,tracing state like we do in entry.

(XXX: we really should change arch_cpu_idle() to not return with
interrupts enabled)
Reported-by: default avatarSven Schnelle <svens@linux.ibm.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarMark Rutland <mark.rutland@arm.com>
Tested-by: default avatarMark Rutland <mark.rutland@arm.com>
Link: https://lkml.kernel.org/r/20201120114925.594122626@infradead.org
parent 43be4388
...@@ -57,7 +57,7 @@ EXPORT_SYMBOL(pm_power_off); ...@@ -57,7 +57,7 @@ EXPORT_SYMBOL(pm_power_off);
void arch_cpu_idle(void) void arch_cpu_idle(void)
{ {
wtint(0); wtint(0);
local_irq_enable(); raw_local_irq_enable();
} }
void arch_cpu_idle_dead(void) void arch_cpu_idle_dead(void)
......
...@@ -71,7 +71,7 @@ void arch_cpu_idle(void) ...@@ -71,7 +71,7 @@ void arch_cpu_idle(void)
arm_pm_idle(); arm_pm_idle();
else else
cpu_do_idle(); cpu_do_idle();
local_irq_enable(); raw_local_irq_enable();
} }
void arch_cpu_idle_prepare(void) void arch_cpu_idle_prepare(void)
......
...@@ -126,7 +126,7 @@ void arch_cpu_idle(void) ...@@ -126,7 +126,7 @@ void arch_cpu_idle(void)
* tricks * tricks
*/ */
cpu_do_idle(); cpu_do_idle();
local_irq_enable(); raw_local_irq_enable();
} }
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
......
...@@ -102,6 +102,6 @@ void arch_cpu_idle(void) ...@@ -102,6 +102,6 @@ void arch_cpu_idle(void)
#ifdef CONFIG_CPU_PM_STOP #ifdef CONFIG_CPU_PM_STOP
asm volatile("stop\n"); asm volatile("stop\n");
#endif #endif
local_irq_enable(); raw_local_irq_enable();
} }
#endif #endif
...@@ -57,7 +57,7 @@ asmlinkage void ret_from_kernel_thread(void); ...@@ -57,7 +57,7 @@ asmlinkage void ret_from_kernel_thread(void);
*/ */
void arch_cpu_idle(void) void arch_cpu_idle(void)
{ {
local_irq_enable(); raw_local_irq_enable();
__asm__("sleep"); __asm__("sleep");
} }
......
...@@ -44,7 +44,7 @@ void arch_cpu_idle(void) ...@@ -44,7 +44,7 @@ void arch_cpu_idle(void)
{ {
__vmwait(); __vmwait();
/* interrupts wake us up, but irqs are still disabled */ /* interrupts wake us up, but irqs are still disabled */
local_irq_enable(); raw_local_irq_enable();
} }
/* /*
......
...@@ -239,7 +239,7 @@ void arch_cpu_idle(void) ...@@ -239,7 +239,7 @@ void arch_cpu_idle(void)
if (mark_idle) if (mark_idle)
(*mark_idle)(1); (*mark_idle)(1);
safe_halt(); raw_safe_halt();
if (mark_idle) if (mark_idle)
(*mark_idle)(0); (*mark_idle)(0);
......
...@@ -149,5 +149,5 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs) ...@@ -149,5 +149,5 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs)
void arch_cpu_idle(void) void arch_cpu_idle(void)
{ {
local_irq_enable(); raw_local_irq_enable();
} }
...@@ -33,19 +33,19 @@ static void __cpuidle r3081_wait(void) ...@@ -33,19 +33,19 @@ static void __cpuidle r3081_wait(void)
{ {
unsigned long cfg = read_c0_conf(); unsigned long cfg = read_c0_conf();
write_c0_conf(cfg | R30XX_CONF_HALT); write_c0_conf(cfg | R30XX_CONF_HALT);
local_irq_enable(); raw_local_irq_enable();
} }
static void __cpuidle r39xx_wait(void) static void __cpuidle r39xx_wait(void)
{ {
if (!need_resched()) if (!need_resched())
write_c0_conf(read_c0_conf() | TX39_CONF_HALT); write_c0_conf(read_c0_conf() | TX39_CONF_HALT);
local_irq_enable(); raw_local_irq_enable();
} }
void __cpuidle r4k_wait(void) void __cpuidle r4k_wait(void)
{ {
local_irq_enable(); raw_local_irq_enable();
__r4k_wait(); __r4k_wait();
} }
...@@ -64,7 +64,7 @@ void __cpuidle r4k_wait_irqoff(void) ...@@ -64,7 +64,7 @@ void __cpuidle r4k_wait_irqoff(void)
" .set arch=r4000 \n" " .set arch=r4000 \n"
" wait \n" " wait \n"
" .set pop \n"); " .set pop \n");
local_irq_enable(); raw_local_irq_enable();
} }
/* /*
...@@ -84,7 +84,7 @@ static void __cpuidle rm7k_wait_irqoff(void) ...@@ -84,7 +84,7 @@ static void __cpuidle rm7k_wait_irqoff(void)
" wait \n" " wait \n"
" mtc0 $1, $12 # stalls until W stage \n" " mtc0 $1, $12 # stalls until W stage \n"
" .set pop \n"); " .set pop \n");
local_irq_enable(); raw_local_irq_enable();
} }
/* /*
...@@ -257,7 +257,7 @@ void arch_cpu_idle(void) ...@@ -257,7 +257,7 @@ void arch_cpu_idle(void)
if (cpu_wait) if (cpu_wait)
cpu_wait(); cpu_wait();
else else
local_irq_enable(); raw_local_irq_enable();
} }
#ifdef CONFIG_CPU_IDLE #ifdef CONFIG_CPU_IDLE
......
...@@ -33,7 +33,7 @@ EXPORT_SYMBOL(pm_power_off); ...@@ -33,7 +33,7 @@ EXPORT_SYMBOL(pm_power_off);
void arch_cpu_idle(void) void arch_cpu_idle(void)
{ {
local_irq_enable(); raw_local_irq_enable();
} }
/* /*
......
...@@ -79,7 +79,7 @@ void machine_power_off(void) ...@@ -79,7 +79,7 @@ void machine_power_off(void)
*/ */
void arch_cpu_idle(void) void arch_cpu_idle(void)
{ {
local_irq_enable(); raw_local_irq_enable();
if (mfspr(SPR_UPR) & SPR_UPR_PMP) if (mfspr(SPR_UPR) & SPR_UPR_PMP)
mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME); mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
} }
......
...@@ -169,7 +169,7 @@ void __cpuidle arch_cpu_idle_dead(void) ...@@ -169,7 +169,7 @@ void __cpuidle arch_cpu_idle_dead(void)
void __cpuidle arch_cpu_idle(void) void __cpuidle arch_cpu_idle(void)
{ {
local_irq_enable(); raw_local_irq_enable();
/* nop on real hardware, qemu will idle sleep. */ /* nop on real hardware, qemu will idle sleep. */
asm volatile("or %%r10,%%r10,%%r10\n":::); asm volatile("or %%r10,%%r10,%%r10\n":::);
......
...@@ -52,9 +52,9 @@ void arch_cpu_idle(void) ...@@ -52,9 +52,9 @@ void arch_cpu_idle(void)
* interrupts enabled, some don't. * interrupts enabled, some don't.
*/ */
if (irqs_disabled()) if (irqs_disabled())
local_irq_enable(); raw_local_irq_enable();
} else { } else {
local_irq_enable(); raw_local_irq_enable();
/* /*
* Go into low thread priority and possibly * Go into low thread priority and possibly
* low power mode. * low power mode.
......
...@@ -36,7 +36,7 @@ extern asmlinkage void ret_from_kernel_thread(void); ...@@ -36,7 +36,7 @@ extern asmlinkage void ret_from_kernel_thread(void);
void arch_cpu_idle(void) void arch_cpu_idle(void)
{ {
wait_for_interrupt(); wait_for_interrupt();
local_irq_enable(); raw_local_irq_enable();
} }
void show_regs(struct pt_regs *regs) void show_regs(struct pt_regs *regs)
......
...@@ -33,10 +33,10 @@ void enabled_wait(void) ...@@ -33,10 +33,10 @@ void enabled_wait(void)
PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
clear_cpu_flag(CIF_NOHZ_DELAY); clear_cpu_flag(CIF_NOHZ_DELAY);
local_irq_save(flags); raw_local_irq_save(flags);
/* Call the assembler magic in entry.S */ /* Call the assembler magic in entry.S */
psw_idle(idle, psw_mask); psw_idle(idle, psw_mask);
local_irq_restore(flags); raw_local_irq_restore(flags);
/* Account time spent with enabled wait psw loaded as idle time. */ /* Account time spent with enabled wait psw loaded as idle time. */
raw_write_seqcount_begin(&idle->seqcount); raw_write_seqcount_begin(&idle->seqcount);
...@@ -123,7 +123,7 @@ void arch_cpu_idle_enter(void) ...@@ -123,7 +123,7 @@ void arch_cpu_idle_enter(void)
void arch_cpu_idle(void) void arch_cpu_idle(void)
{ {
enabled_wait(); enabled_wait();
local_irq_enable(); raw_local_irq_enable();
} }
void arch_cpu_idle_exit(void) void arch_cpu_idle_exit(void)
......
...@@ -22,7 +22,7 @@ static void (*sh_idle)(void); ...@@ -22,7 +22,7 @@ static void (*sh_idle)(void);
void default_idle(void) void default_idle(void)
{ {
set_bl_bit(); set_bl_bit();
local_irq_enable(); raw_local_irq_enable();
/* Isn't this racy ? */ /* Isn't this racy ? */
cpu_sleep(); cpu_sleep();
clear_bl_bit(); clear_bl_bit();
......
...@@ -50,7 +50,7 @@ static void pmc_leon_idle_fixup(void) ...@@ -50,7 +50,7 @@ static void pmc_leon_idle_fixup(void)
register unsigned int address = (unsigned int)leon3_irqctrl_regs; register unsigned int address = (unsigned int)leon3_irqctrl_regs;
/* Interrupts need to be enabled to not hang the CPU */ /* Interrupts need to be enabled to not hang the CPU */
local_irq_enable(); raw_local_irq_enable();
__asm__ __volatile__ ( __asm__ __volatile__ (
"wr %%g0, %%asr19\n" "wr %%g0, %%asr19\n"
...@@ -66,7 +66,7 @@ static void pmc_leon_idle_fixup(void) ...@@ -66,7 +66,7 @@ static void pmc_leon_idle_fixup(void)
static void pmc_leon_idle(void) static void pmc_leon_idle(void)
{ {
/* Interrupts need to be enabled to not hang the CPU */ /* Interrupts need to be enabled to not hang the CPU */
local_irq_enable(); raw_local_irq_enable();
/* For systems without power-down, this will be no-op */ /* For systems without power-down, this will be no-op */
__asm__ __volatile__ ("wr %g0, %asr19\n\t"); __asm__ __volatile__ ("wr %g0, %asr19\n\t");
......
...@@ -74,7 +74,7 @@ void arch_cpu_idle(void) ...@@ -74,7 +74,7 @@ void arch_cpu_idle(void)
{ {
if (sparc_idle) if (sparc_idle)
(*sparc_idle)(); (*sparc_idle)();
local_irq_enable(); raw_local_irq_enable();
} }
/* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */ /* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */
......
...@@ -62,11 +62,11 @@ void arch_cpu_idle(void) ...@@ -62,11 +62,11 @@ void arch_cpu_idle(void)
{ {
if (tlb_type != hypervisor) { if (tlb_type != hypervisor) {
touch_nmi_watchdog(); touch_nmi_watchdog();
local_irq_enable(); raw_local_irq_enable();
} else { } else {
unsigned long pstate; unsigned long pstate;
local_irq_enable(); raw_local_irq_enable();
/* The sun4v sleeping code requires that we have PSTATE.IE cleared over /* The sun4v sleeping code requires that we have PSTATE.IE cleared over
* the cpu sleep hypervisor call. * the cpu sleep hypervisor call.
......
...@@ -217,7 +217,7 @@ void arch_cpu_idle(void) ...@@ -217,7 +217,7 @@ void arch_cpu_idle(void)
{ {
cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); cpu_tasks[current_thread_info()->cpu].pid = os_getpid();
um_idle_sleep(); um_idle_sleep();
local_irq_enable(); raw_local_irq_enable();
} }
int __cant_sleep(void) { int __cant_sleep(void) {
......
...@@ -88,8 +88,6 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, ...@@ -88,8 +88,6 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,
static inline void __sti_mwait(unsigned long eax, unsigned long ecx) static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
{ {
trace_hardirqs_on();
mds_idle_clear_cpu_buffers(); mds_idle_clear_cpu_buffers();
/* "mwait %eax, %ecx;" */ /* "mwait %eax, %ecx;" */
asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
......
...@@ -685,7 +685,7 @@ void arch_cpu_idle(void) ...@@ -685,7 +685,7 @@ void arch_cpu_idle(void)
*/ */
void __cpuidle default_idle(void) void __cpuidle default_idle(void)
{ {
safe_halt(); raw_safe_halt();
} }
#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
EXPORT_SYMBOL(default_idle); EXPORT_SYMBOL(default_idle);
...@@ -736,6 +736,8 @@ void stop_this_cpu(void *dummy) ...@@ -736,6 +736,8 @@ void stop_this_cpu(void *dummy)
/* /*
* AMD Erratum 400 aware idle routine. We handle it the same way as C3 power * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
* states (local apic timer and TSC stop). * states (local apic timer and TSC stop).
*
* XXX this function is completely buggered vs RCU and tracing.
*/ */
static void amd_e400_idle(void) static void amd_e400_idle(void)
{ {
...@@ -757,9 +759,9 @@ static void amd_e400_idle(void) ...@@ -757,9 +759,9 @@ static void amd_e400_idle(void)
* The switch back from broadcast mode needs to be called with * The switch back from broadcast mode needs to be called with
* interrupts disabled. * interrupts disabled.
*/ */
local_irq_disable(); raw_local_irq_disable();
tick_broadcast_exit(); tick_broadcast_exit();
local_irq_enable(); raw_local_irq_enable();
} }
/* /*
...@@ -801,9 +803,9 @@ static __cpuidle void mwait_idle(void) ...@@ -801,9 +803,9 @@ static __cpuidle void mwait_idle(void)
if (!need_resched()) if (!need_resched())
__sti_mwait(0, 0); __sti_mwait(0, 0);
else else
local_irq_enable(); raw_local_irq_enable();
} else { } else {
local_irq_enable(); raw_local_irq_enable();
} }
__current_clr_polling(); __current_clr_polling();
} }
......
...@@ -78,7 +78,7 @@ void __weak arch_cpu_idle_dead(void) { } ...@@ -78,7 +78,7 @@ void __weak arch_cpu_idle_dead(void) { }
void __weak arch_cpu_idle(void) void __weak arch_cpu_idle(void)
{ {
cpu_idle_force_poll = 1; cpu_idle_force_poll = 1;
local_irq_enable(); raw_local_irq_enable();
} }
/** /**
...@@ -94,9 +94,35 @@ void __cpuidle default_idle_call(void) ...@@ -94,9 +94,35 @@ void __cpuidle default_idle_call(void)
trace_cpu_idle(1, smp_processor_id()); trace_cpu_idle(1, smp_processor_id());
stop_critical_timings(); stop_critical_timings();
/*
* arch_cpu_idle() is supposed to enable IRQs, however
* we can't do that because of RCU and tracing.
*
* Trace IRQs enable here, then switch off RCU, and have
* arch_cpu_idle() use raw_local_irq_enable(). Note that
* rcu_idle_enter() relies on lockdep IRQ state, so switch that
* last -- this is very similar to the entry code.
*/
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare(_THIS_IP_);
rcu_idle_enter(); rcu_idle_enter();
lockdep_hardirqs_on(_THIS_IP_);
arch_cpu_idle(); arch_cpu_idle();
/*
* OK, so IRQs are enabled here, but RCU needs them disabled to
* turn itself back on.. funny thing is that disabling IRQs
* will cause tracing, which needs RCU. Jump through hoops to
* make it 'work'.
*/
raw_local_irq_disable();
lockdep_hardirqs_off(_THIS_IP_);
rcu_idle_exit(); rcu_idle_exit();
lockdep_hardirqs_on(_THIS_IP_);
raw_local_irq_enable();
start_critical_timings(); start_critical_timings();
trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment