Commit 3eeec385 authored by Thomas Gleixner's avatar Thomas Gleixner

x86/entry: Provide idtentry_entry/exit_cond_rcu()

After a lengthy discussion [1] it turned out that RCU does not need a full
rcu_irq_enter/exit() when RCU is already watching. All it needs if
NOHZ_FULL is active is to check whether the tick needs to be restarted.

This allows to avoid a separate variant for the pagefault handler which
cannot invoke rcu_irq_enter() on a kernel pagefault which might sleep.

The cond_rcu argument is only temporary and will be removed once the
existing users of idtentry_enter/exit() have been cleaned up. After that
the code can be significantly simplified.

[ mingo: Simplified the control flow ]
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Acked-by: default avatar"Paul E. McKenney" <paulmck@kernel.org>
Acked-by: default avatarAndy Lutomirski <luto@kernel.org>
Link: [1] https://lkml.kernel.org/r/20200515235125.628629605@linutronix.de
Link: https://lore.kernel.org/r/20200521202117.181397835@linutronix.de
parent 2ab70319
...@@ -512,8 +512,10 @@ SYSCALL_DEFINE0(ni_syscall) ...@@ -512,8 +512,10 @@ SYSCALL_DEFINE0(ni_syscall)
} }
/** /**
* idtentry_enter - Handle state tracking on idtentry * idtentry_enter_cond_rcu - Handle state tracking on idtentry with conditional
* RCU handling
* @regs: Pointer to pt_regs of interrupted context * @regs: Pointer to pt_regs of interrupted context
* @cond_rcu: Invoke rcu_irq_enter() only if RCU is not watching
* *
* Invokes: * Invokes:
* - lockdep irqflag state tracking as low level ASM entry disabled * - lockdep irqflag state tracking as low level ASM entry disabled
...@@ -521,40 +523,84 @@ SYSCALL_DEFINE0(ni_syscall) ...@@ -521,40 +523,84 @@ SYSCALL_DEFINE0(ni_syscall)
* *
* - Context tracking if the exception hit user mode. * - Context tracking if the exception hit user mode.
* *
* - RCU notification if the exception hit kernel mode.
*
* - The hardirq tracer to keep the state consistent as low level ASM * - The hardirq tracer to keep the state consistent as low level ASM
* entry disabled interrupts. * entry disabled interrupts.
*
* For kernel mode entries RCU handling is done conditional. If RCU is
* watching then the only RCU requirement is to check whether the tick has
* to be restarted. If RCU is not watching then rcu_irq_enter() has to be
* invoked on entry and rcu_irq_exit() on exit.
*
* Avoiding the rcu_irq_enter/exit() calls is an optimization but also
* solves the problem of kernel mode pagefaults which can schedule, which
* is not possible after invoking rcu_irq_enter() without undoing it.
*
* For user mode entries enter_from_user_mode() must be invoked to
* establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
* would not be possible.
*
* Returns: True if RCU has been adjusted on a kernel entry
* False otherwise
*
* The return value must be fed into the rcu_exit argument of
* idtentry_exit_cond_rcu().
*/ */
void noinstr idtentry_enter(struct pt_regs *regs) bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs, bool cond_rcu)
{ {
if (user_mode(regs)) { if (user_mode(regs)) {
enter_from_user_mode(); enter_from_user_mode();
} else { return false;
}
if (!cond_rcu || !__rcu_is_watching()) {
/*
* If RCU is not watching then the same careful
* sequence vs. lockdep and tracing is required
* as in enter_from_user_mode().
*
* This only happens for IRQs that hit the idle
* loop, i.e. if idle is not using MWAIT.
*/
lockdep_hardirqs_off(CALLER_ADDR0); lockdep_hardirqs_off(CALLER_ADDR0);
rcu_irq_enter(); rcu_irq_enter();
instrumentation_begin(); instrumentation_begin();
trace_hardirqs_off_prepare(); trace_hardirqs_off_prepare();
instrumentation_end(); instrumentation_end();
return true;
} }
/*
* If RCU is watching then RCU only wants to check
* whether it needs to restart the tick in NOHZ
* mode.
*/
instrumentation_begin();
rcu_irq_enter_check_tick();
/* Use the combo lockdep/tracing function */
trace_hardirqs_off();
instrumentation_end();
return false;
} }
/** /**
* idtentry_exit - Common code to handle return from exceptions * idtentry_exit_cond_rcu - Handle return from exception with conditional RCU
* handling
* @regs: Pointer to pt_regs (exception entry regs) * @regs: Pointer to pt_regs (exception entry regs)
* @rcu_exit: Invoke rcu_irq_exit() if true
* *
* Depending on the return target (kernel/user) this runs the necessary * Depending on the return target (kernel/user) this runs the necessary
* preemption and work checks if possible and required and returns to * preemption and work checks if possible and reguired and returns to
* the caller with interrupts disabled and no further work pending. * the caller with interrupts disabled and no further work pending.
* *
* This is the last action before returning to the low level ASM code which * This is the last action before returning to the low level ASM code which
* just needs to return to the appropriate context. * just needs to return to the appropriate context.
* *
* Invoked by all exception/interrupt IDTENTRY handlers which are not * Counterpart to idtentry_enter_cond_rcu(). The return value of the entry
* returning through the paranoid exit path (all except NMI, #DF and the IST * function must be fed into the @rcu_exit argument.
* variants of #MC and #DB) and are therefore on the thread stack.
*/ */
void noinstr idtentry_exit(struct pt_regs *regs) void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
{ {
lockdep_assert_irqs_disabled(); lockdep_assert_irqs_disabled();
...@@ -580,7 +626,8 @@ void noinstr idtentry_exit(struct pt_regs *regs) ...@@ -580,7 +626,8 @@ void noinstr idtentry_exit(struct pt_regs *regs)
if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
WARN_ON_ONCE(!on_thread_stack()); WARN_ON_ONCE(!on_thread_stack());
instrumentation_begin(); instrumentation_begin();
rcu_irq_exit_preempt(); if (rcu_exit)
rcu_irq_exit_preempt();
if (need_resched()) if (need_resched())
preempt_schedule_irq(); preempt_schedule_irq();
/* Covers both tracing and lockdep */ /* Covers both tracing and lockdep */
...@@ -602,10 +649,12 @@ void noinstr idtentry_exit(struct pt_regs *regs) ...@@ -602,10 +649,12 @@ void noinstr idtentry_exit(struct pt_regs *regs)
trace_hardirqs_on_prepare(); trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare(CALLER_ADDR0); lockdep_hardirqs_on_prepare(CALLER_ADDR0);
instrumentation_end(); instrumentation_end();
rcu_irq_exit(); if (rcu_exit)
rcu_irq_exit();
lockdep_hardirqs_on(CALLER_ADDR0); lockdep_hardirqs_on(CALLER_ADDR0);
} else { } else {
/* IRQ flags state is correct already. Just tell RCU */ /* IRQ flags state is correct already. Just tell RCU. */
rcu_irq_exit(); if (rcu_exit)
rcu_irq_exit();
} }
} }
...@@ -7,8 +7,18 @@ ...@@ -7,8 +7,18 @@
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
void idtentry_enter(struct pt_regs *regs); bool idtentry_enter_cond_rcu(struct pt_regs *regs, bool cond_rcu);
void idtentry_exit(struct pt_regs *regs); void idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit);
static __always_inline void idtentry_enter(struct pt_regs *regs)
{
idtentry_enter_cond_rcu(regs, false);
}
static __always_inline void idtentry_exit(struct pt_regs *regs)
{
idtentry_exit_cond_rcu(regs, true);
}
/** /**
* DECLARE_IDTENTRY - Declare functions for simple IDT entry points * DECLARE_IDTENTRY - Declare functions for simple IDT entry points
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment