Commit 306e0604 authored by Mathieu Desnoyers's avatar Mathieu Desnoyers Committed by Ingo Molnar

membarrier: Document scheduler barrier requirements

Document the membarrier requirement on having a full memory barrier in
__schedule() after coming from user-space, before storing to rq->curr.
It is provided by smp_mb__after_spinlock() in __schedule().

Document that membarrier requires a full barrier on transition from
kernel thread to userspace thread. We currently have an implicit barrier
from atomic_dec_and_test() in mmdrop() that ensures this.

The x86 switch_mm_irqs_off() full barrier is currently provided by many
cpumask update operations as well as write_cr3(). Document that
write_cr3() provides this barrier.
Signed-off-by: default avatarMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: default avatarThomas Gleixner <tglx@linutronix.de>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrea Parri <parri.andrea@gmail.com>
Cc: Andrew Hunter <ahh@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Avi Kivity <avi@scylladb.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Dave Watson <davejwatson@fb.com>
Cc: David Sehr <sehr@google.com>
Cc: Greg Hackmann <ghackmann@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maged Michael <maged.michael@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-api@vger.kernel.org
Link: http://lkml.kernel.org/r/20180129202020.8515-4-mathieu.desnoyers@efficios.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 3ccfebed
...@@ -228,6 +228,11 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, ...@@ -228,6 +228,11 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
#endif #endif
this_cpu_write(cpu_tlbstate.is_lazy, false); this_cpu_write(cpu_tlbstate.is_lazy, false);
/*
* The membarrier system call requires a full memory barrier
* before returning to user-space, after storing to rq->curr.
* Writing to CR3 provides that full memory barrier.
*/
if (real_prev == next) { if (real_prev == next) {
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
next->context.ctx_id); next->context.ctx_id);
......
...@@ -39,6 +39,11 @@ static inline void mmgrab(struct mm_struct *mm) ...@@ -39,6 +39,11 @@ static inline void mmgrab(struct mm_struct *mm)
extern void __mmdrop(struct mm_struct *); extern void __mmdrop(struct mm_struct *);
static inline void mmdrop(struct mm_struct *mm) static inline void mmdrop(struct mm_struct *mm)
{ {
/*
* The implicit full barrier implied by atomic_dec_and_test() is
* required by the membarrier system call before returning to
* user-space, after storing to rq->curr.
*/
if (unlikely(atomic_dec_and_test(&mm->mm_count))) if (unlikely(atomic_dec_and_test(&mm->mm_count)))
__mmdrop(mm); __mmdrop(mm);
} }
......
...@@ -2703,6 +2703,12 @@ static struct rq *finish_task_switch(struct task_struct *prev) ...@@ -2703,6 +2703,12 @@ static struct rq *finish_task_switch(struct task_struct *prev)
finish_arch_post_lock_switch(); finish_arch_post_lock_switch();
fire_sched_in_preempt_notifiers(current); fire_sched_in_preempt_notifiers(current);
/*
* When transitioning from a kernel thread to a userspace
* thread, mmdrop()'s implicit full barrier is required by the
* membarrier system call, because the current ->active_mm can
* become the current mm without going through switch_mm().
*/
if (mm) if (mm)
mmdrop(mm); mmdrop(mm);
if (unlikely(prev_state == TASK_DEAD)) { if (unlikely(prev_state == TASK_DEAD)) {
...@@ -2808,6 +2814,13 @@ context_switch(struct rq *rq, struct task_struct *prev, ...@@ -2808,6 +2814,13 @@ context_switch(struct rq *rq, struct task_struct *prev,
*/ */
arch_start_context_switch(prev); arch_start_context_switch(prev);
/*
* If mm is non-NULL, we pass through switch_mm(). If mm is
* NULL, we will pass through mmdrop() in finish_task_switch().
* Both of these contain the full memory barrier required by
* membarrier after storing to rq->curr, before returning to
* user-space.
*/
if (!mm) { if (!mm) {
next->active_mm = oldmm; next->active_mm = oldmm;
mmgrab(oldmm); mmgrab(oldmm);
...@@ -3344,6 +3357,9 @@ static void __sched notrace __schedule(bool preempt) ...@@ -3344,6 +3357,9 @@ static void __sched notrace __schedule(bool preempt)
* Make sure that signal_pending_state()->signal_pending() below * Make sure that signal_pending_state()->signal_pending() below
* can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
* done by the caller to avoid the race with signal_wake_up(). * done by the caller to avoid the race with signal_wake_up().
*
* The membarrier system call requires a full memory barrier
* after coming from user-space, before storing to rq->curr.
*/ */
rq_lock(rq, &rf); rq_lock(rq, &rf);
smp_mb__after_spinlock(); smp_mb__after_spinlock();
...@@ -3391,17 +3407,16 @@ static void __sched notrace __schedule(bool preempt) ...@@ -3391,17 +3407,16 @@ static void __sched notrace __schedule(bool preempt)
/* /*
* The membarrier system call requires each architecture * The membarrier system call requires each architecture
* to have a full memory barrier after updating * to have a full memory barrier after updating
* rq->curr, before returning to user-space. For TSO * rq->curr, before returning to user-space.
* (e.g. x86), the architecture must provide its own *
* barrier in switch_mm(). For weakly ordered machines * Here are the schemes providing that barrier on the
* for which spin_unlock() acts as a full memory * various architectures:
* barrier, finish_lock_switch() in common code takes * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
* care of this barrier. For weakly ordered machines for * switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
* which spin_unlock() acts as a RELEASE barrier (only * - finish_lock_switch() for weakly-ordered
* arm64 and PowerPC), arm64 has a full barrier in * architectures where spin_unlock is a full barrier,
* switch_to(), and PowerPC has * - switch_to() for arm64 (weakly-ordered, spin_unlock
* smp_mb__after_unlock_lock() before * is a RELEASE barrier),
* finish_lock_switch().
*/ */
++*switch_count; ++*switch_count;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment