Commit c5f58bd5 authored by Mathieu Desnoyers's avatar Mathieu Desnoyers Committed by Ingo Molnar

membarrier: Provide GLOBAL_EXPEDITED command

Allow expedited membarrier to be used for data shared between processes
through shared memory.

Processes wishing to receive the membarriers register with
MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED. Those which want to issue
membarrier invoke MEMBARRIER_CMD_GLOBAL_EXPEDITED.

This allows extremely simple kernel-level implementation: we have almost
everything we need with the PRIVATE_EXPEDITED barrier code. All we need
to do is to add a flag in the mm_struct that will be used to check
whether we need to send the IPI to the current thread of each CPU.

There is a slight downside to this approach compared to targeting
specific shared memory users: when performing a membarrier operation,
all registered "global" receivers will get the barrier, even if they
don't share a memory mapping with the sender issuing
MEMBARRIER_CMD_GLOBAL_EXPEDITED.

This registration approach seems to fit the requirement of not
disturbing processes that really deeply care about real-time: they
simply should not register with MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED.

In order to align the membarrier command names, the "MEMBARRIER_CMD_SHARED"
command is renamed to "MEMBARRIER_CMD_GLOBAL", keeping an alias of
MEMBARRIER_CMD_SHARED to MEMBARRIER_CMD_GLOBAL for UAPI header backward
compatibility.
Signed-off-by: default avatarMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: default avatarThomas Gleixner <tglx@linutronix.de>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrea Parri <parri.andrea@gmail.com>
Cc: Andrew Hunter <ahh@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Avi Kivity <avi@scylladb.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Dave Watson <davejwatson@fb.com>
Cc: David Sehr <sehr@google.com>
Cc: Greg Hackmann <ghackmann@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maged Michael <maged.michael@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-api@vger.kernel.org
Link: http://lkml.kernel.org/r/20180129202020.8515-5-mathieu.desnoyers@efficios.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 306e0604
...@@ -13,7 +13,8 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev, ...@@ -13,7 +13,8 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
* store to rq->curr. * store to rq->curr.
*/ */
if (likely(!(atomic_read(&next->membarrier_state) & if (likely(!(atomic_read(&next->membarrier_state) &
MEMBARRIER_STATE_PRIVATE_EXPEDITED) || !prev)) (MEMBARRIER_STATE_PRIVATE_EXPEDITED |
MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev))
return; return;
/* /*
......
...@@ -221,6 +221,8 @@ static inline void memalloc_noreclaim_restore(unsigned int flags) ...@@ -221,6 +221,8 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
enum { enum {
MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0),
MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1), MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1),
MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY = (1U << 2),
MEMBARRIER_STATE_GLOBAL_EXPEDITED = (1U << 3),
}; };
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
* enum membarrier_cmd - membarrier system call command * enum membarrier_cmd - membarrier system call command
* @MEMBARRIER_CMD_QUERY: Query the set of supported commands. It returns * @MEMBARRIER_CMD_QUERY: Query the set of supported commands. It returns
* a bitmask of valid commands. * a bitmask of valid commands.
* @MEMBARRIER_CMD_SHARED: Execute a memory barrier on all running threads. * @MEMBARRIER_CMD_GLOBAL: Execute a memory barrier on all running threads.
* Upon return from system call, the caller thread * Upon return from system call, the caller thread
* is ensured that all running threads have passed * is ensured that all running threads have passed
* through a state where all memory accesses to * through a state where all memory accesses to
...@@ -40,6 +40,28 @@ ...@@ -40,6 +40,28 @@
* (non-running threads are de facto in such a * (non-running threads are de facto in such a
* state). This covers threads from all processes * state). This covers threads from all processes
* running on the system. This command returns 0. * running on the system. This command returns 0.
* @MEMBARRIER_CMD_GLOBAL_EXPEDITED:
* Execute a memory barrier on all running threads
* of all processes which previously registered
* with MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED.
* Upon return from system call, the caller thread
* is ensured that all running threads have passed
* through a state where all memory accesses to
* user-space addresses match program order between
* entry to and return from the system call
* (non-running threads are de facto in such a
* state). This only covers threads from processes
* which registered with
* MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED.
* This command returns 0. Given that
* registration is about the intent to receive
* the barriers, it is valid to invoke
* MEMBARRIER_CMD_GLOBAL_EXPEDITED from a
* non-registered process.
* @MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
* Register the process intent to receive
* MEMBARRIER_CMD_GLOBAL_EXPEDITED memory
* barriers. Always returns 0.
* @MEMBARRIER_CMD_PRIVATE_EXPEDITED: * @MEMBARRIER_CMD_PRIVATE_EXPEDITED:
* Execute a memory barrier on each running * Execute a memory barrier on each running
* thread belonging to the same process as the current * thread belonging to the same process as the current
...@@ -64,6 +86,9 @@ ...@@ -64,6 +86,9 @@
* Register the process intent to use * Register the process intent to use
* MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always * MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always
* returns 0. * returns 0.
* @MEMBARRIER_CMD_SHARED:
* Alias to MEMBARRIER_CMD_GLOBAL. Provided for
* header backward compatibility.
* *
* Command to be passed to the membarrier system call. The commands need to * Command to be passed to the membarrier system call. The commands need to
* be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
...@@ -71,11 +96,14 @@ ...@@ -71,11 +96,14 @@
*/ */
enum membarrier_cmd { enum membarrier_cmd {
MEMBARRIER_CMD_QUERY = 0, MEMBARRIER_CMD_QUERY = 0,
MEMBARRIER_CMD_SHARED = (1 << 0), MEMBARRIER_CMD_GLOBAL = (1 << 0),
/* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */ MEMBARRIER_CMD_GLOBAL_EXPEDITED = (1 << 1),
/* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */ MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED = (1 << 2),
MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3), MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3),
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4), MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4),
/* Alias for header backward compatibility. */
MEMBARRIER_CMD_SHARED = MEMBARRIER_CMD_GLOBAL,
}; };
#endif /* _UAPI_LINUX_MEMBARRIER_H */ #endif /* _UAPI_LINUX_MEMBARRIER_H */
...@@ -27,7 +27,9 @@ ...@@ -27,7 +27,9 @@
* except MEMBARRIER_CMD_QUERY. * except MEMBARRIER_CMD_QUERY.
*/ */
#define MEMBARRIER_CMD_BITMASK \ #define MEMBARRIER_CMD_BITMASK \
(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED \ (MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \
| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
| MEMBARRIER_CMD_PRIVATE_EXPEDITED \
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED) | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED)
static void ipi_mb(void *info) static void ipi_mb(void *info)
...@@ -35,6 +37,73 @@ static void ipi_mb(void *info) ...@@ -35,6 +37,73 @@ static void ipi_mb(void *info)
smp_mb(); /* IPIs should be serializing but paranoid. */ smp_mb(); /* IPIs should be serializing but paranoid. */
} }
static int membarrier_global_expedited(void)
{
int cpu;
bool fallback = false;
cpumask_var_t tmpmask;
if (num_online_cpus() == 1)
return 0;
/*
* Matches memory barriers around rq->curr modification in
* scheduler.
*/
smp_mb(); /* system call entry is not a mb. */
/*
* Expedited membarrier commands guarantee that they won't
* block, hence the GFP_NOWAIT allocation flag and fallback
* implementation.
*/
if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
/* Fallback for OOM. */
fallback = true;
}
cpus_read_lock();
for_each_online_cpu(cpu) {
struct task_struct *p;
/*
* Skipping the current CPU is OK even through we can be
* migrated at any point. The current CPU, at the point
* where we read raw_smp_processor_id(), is ensured to
* be in program order with respect to the caller
* thread. Therefore, we can skip this CPU from the
* iteration.
*/
if (cpu == raw_smp_processor_id())
continue;
rcu_read_lock();
p = task_rcu_dereference(&cpu_rq(cpu)->curr);
if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
MEMBARRIER_STATE_GLOBAL_EXPEDITED)) {
if (!fallback)
__cpumask_set_cpu(cpu, tmpmask);
else
smp_call_function_single(cpu, ipi_mb, NULL, 1);
}
rcu_read_unlock();
}
if (!fallback) {
preempt_disable();
smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
preempt_enable();
free_cpumask_var(tmpmask);
}
cpus_read_unlock();
/*
* Memory barrier on the caller thread _after_ we finished
* waiting for the last IPI. Matches memory barriers around
* rq->curr modification in scheduler.
*/
smp_mb(); /* exit from system call is not a mb */
return 0;
}
static int membarrier_private_expedited(void) static int membarrier_private_expedited(void)
{ {
int cpu; int cpu;
...@@ -105,7 +174,38 @@ static int membarrier_private_expedited(void) ...@@ -105,7 +174,38 @@ static int membarrier_private_expedited(void)
return 0; return 0;
} }
static void membarrier_register_private_expedited(void) static int membarrier_register_global_expedited(void)
{
struct task_struct *p = current;
struct mm_struct *mm = p->mm;
if (atomic_read(&mm->membarrier_state) &
MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY)
return 0;
atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state);
if (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) {
/*
* For single mm user, single threaded process, we can
* simply issue a memory barrier after setting
* MEMBARRIER_STATE_GLOBAL_EXPEDITED to guarantee that
* no memory access following registration is reordered
* before registration.
*/
smp_mb();
} else {
/*
* For multi-mm user threads, we need to ensure all
* future scheduler executions will observe the new
* thread flag state for this mm.
*/
synchronize_sched();
}
atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
&mm->membarrier_state);
return 0;
}
static int membarrier_register_private_expedited(void)
{ {
struct task_struct *p = current; struct task_struct *p = current;
struct mm_struct *mm = p->mm; struct mm_struct *mm = p->mm;
...@@ -117,7 +217,7 @@ static void membarrier_register_private_expedited(void) ...@@ -117,7 +217,7 @@ static void membarrier_register_private_expedited(void)
*/ */
if (atomic_read(&mm->membarrier_state) if (atomic_read(&mm->membarrier_state)
& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY) & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
return; return 0;
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state); atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state);
if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) { if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) {
/* /*
...@@ -128,6 +228,7 @@ static void membarrier_register_private_expedited(void) ...@@ -128,6 +228,7 @@ static void membarrier_register_private_expedited(void)
} }
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY, atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
&mm->membarrier_state); &mm->membarrier_state);
return 0;
} }
/** /**
...@@ -167,21 +268,24 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) ...@@ -167,21 +268,24 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
int cmd_mask = MEMBARRIER_CMD_BITMASK; int cmd_mask = MEMBARRIER_CMD_BITMASK;
if (tick_nohz_full_enabled()) if (tick_nohz_full_enabled())
cmd_mask &= ~MEMBARRIER_CMD_SHARED; cmd_mask &= ~MEMBARRIER_CMD_GLOBAL;
return cmd_mask; return cmd_mask;
} }
case MEMBARRIER_CMD_SHARED: case MEMBARRIER_CMD_GLOBAL:
/* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */ /* MEMBARRIER_CMD_GLOBAL is not compatible with nohz_full. */
if (tick_nohz_full_enabled()) if (tick_nohz_full_enabled())
return -EINVAL; return -EINVAL;
if (num_online_cpus() > 1) if (num_online_cpus() > 1)
synchronize_sched(); synchronize_sched();
return 0; return 0;
case MEMBARRIER_CMD_GLOBAL_EXPEDITED:
return membarrier_global_expedited();
case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
return membarrier_register_global_expedited();
case MEMBARRIER_CMD_PRIVATE_EXPEDITED: case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
return membarrier_private_expedited(); return membarrier_private_expedited();
case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
membarrier_register_private_expedited(); return membarrier_register_private_expedited();
return 0;
default: default:
return -EINVAL; return -EINVAL;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment