Commit ad3b8fca authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Jiri Slaby

arch: Introduce smp_load_acquire(), smp_store_release()

commit 47933ad4 upstream.

A number of situations currently require the heavyweight smp_mb(),
even though there is no need to order prior stores against later
loads.  Many architectures have much cheaper ways to handle these
situations, but the Linux kernel currently has no portable way
to make use of them.

This commit therefore supplies smp_load_acquire() and
smp_store_release() to remedy this situation.  The new
smp_load_acquire() primitive orders the specified load against
any subsequent reads or writes, while the new smp_store_release()
primitive orders the specifed store against any prior reads or
writes.  These primitives allow array-based circular FIFOs to be
implemented without an smp_mb(), and also allow a theoretical
hole in rcu_assign_pointer() to be closed at no additional
expense on most architectures.

In addition, the RCU experience transitioning from explicit
smp_read_barrier_depends() and smp_wmb() to rcu_dereference()
and rcu_assign_pointer(), respectively resulted in substantial
improvements in readability.  It therefore seems likely that
replacing other explicit barriers with smp_load_acquire() and
smp_store_release() will provide similar benefits.  It appears
that roughly half of the explicit barriers in core kernel code
might be so replaced.

[Changelog by PaulMck]
Reviewed-by: default avatar"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: default avatarPeter Zijlstra <peterz@infradead.org>
Acked-by: default avatarWill Deacon <will.deacon@arm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Michael Ellerman <michael@ellerman.id.au>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Victor Kaplansky <VICTORK@il.ibm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Link: http://lkml.kernel.org/r/20131213150640.908486364@infradead.orgSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
Signed-off-by: default avatarJiri Slaby <jslaby@suse.cz>
parent e0de15fc
...@@ -59,6 +59,21 @@ ...@@ -59,6 +59,21 @@
#define smp_wmb() dmb(ishst) #define smp_wmb() dmb(ishst)
#endif #endif
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
___p1; \
})
#define read_barrier_depends() do { } while(0) #define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0)
......
...@@ -35,10 +35,60 @@ ...@@ -35,10 +35,60 @@
#define smp_mb() barrier() #define smp_mb() barrier()
#define smp_rmb() barrier() #define smp_rmb() barrier()
#define smp_wmb() barrier() #define smp_wmb() barrier()
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
___p1; \
})
#else #else
#define smp_mb() asm volatile("dmb ish" : : : "memory") #define smp_mb() asm volatile("dmb ish" : : : "memory")
#define smp_rmb() asm volatile("dmb ishld" : : : "memory") #define smp_rmb() asm volatile("dmb ishld" : : : "memory")
#define smp_wmb() asm volatile("dmb ishst" : : : "memory") #define smp_wmb() asm volatile("dmb ishst" : : : "memory")
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
switch (sizeof(*p)) { \
case 4: \
asm volatile ("stlr %w1, %0" \
: "=Q" (*p) : "r" (v) : "memory"); \
break; \
case 8: \
asm volatile ("stlr %1, %0" \
: "=Q" (*p) : "r" (v) : "memory"); \
break; \
} \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1; \
compiletime_assert_atomic_type(*p); \
switch (sizeof(*p)) { \
case 4: \
asm volatile ("ldar %w0, %1" \
: "=r" (___p1) : "Q" (*p) : "memory"); \
break; \
case 8: \
asm volatile ("ldar %0, %1" \
: "=r" (___p1) : "Q" (*p) : "memory"); \
break; \
} \
___p1; \
})
#endif #endif
#define read_barrier_depends() do { } while(0) #define read_barrier_depends() do { } while(0)
......
...@@ -45,13 +45,36 @@ ...@@ -45,13 +45,36 @@
# define smp_rmb() rmb() # define smp_rmb() rmb()
# define smp_wmb() wmb() # define smp_wmb() wmb()
# define smp_read_barrier_depends() read_barrier_depends() # define smp_read_barrier_depends() read_barrier_depends()
#else #else
# define smp_mb() barrier() # define smp_mb() barrier()
# define smp_rmb() barrier() # define smp_rmb() barrier()
# define smp_wmb() barrier() # define smp_wmb() barrier()
# define smp_read_barrier_depends() do { } while(0) # define smp_read_barrier_depends() do { } while(0)
#endif #endif
/*
* IA64 GCC turns volatile stores into st.rel and volatile loads into ld.acq no
* need for asm trickery!
*/
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
barrier(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
barrier(); \
___p1; \
})
/* /*
* XXX check on this ---I suspect what Linus really wants here is * XXX check on this ---I suspect what Linus really wants here is
* acquire vs release semantics but we can't discuss this stuff with * acquire vs release semantics but we can't discuss this stuff with
......
...@@ -85,4 +85,19 @@ static inline void fence(void) ...@@ -85,4 +85,19 @@ static inline void fence(void)
#define smp_read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0)
#define set_mb(var, value) do { var = value; smp_mb(); } while (0) #define set_mb(var, value) do { var = value; smp_mb(); } while (0)
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
___p1; \
})
#endif /* _ASM_METAG_BARRIER_H */ #endif /* _ASM_METAG_BARRIER_H */
...@@ -180,4 +180,19 @@ ...@@ -180,4 +180,19 @@
#define nudge_writes() mb() #define nudge_writes() mb()
#endif #endif
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
___p1; \
})
#endif /* __ASM_BARRIER_H */ #endif /* __ASM_BARRIER_H */
...@@ -45,11 +45,15 @@ ...@@ -45,11 +45,15 @@
# define SMPWMB eieio # define SMPWMB eieio
#endif #endif
#define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
#define smp_mb() mb() #define smp_mb() mb()
#define smp_rmb() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") #define smp_rmb() __lwsync()
#define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") #define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
#define smp_read_barrier_depends() read_barrier_depends() #define smp_read_barrier_depends() read_barrier_depends()
#else #else
#define __lwsync() barrier()
#define smp_mb() barrier() #define smp_mb() barrier()
#define smp_rmb() barrier() #define smp_rmb() barrier()
#define smp_wmb() barrier() #define smp_wmb() barrier()
...@@ -65,4 +69,19 @@ ...@@ -65,4 +69,19 @@
#define data_barrier(x) \ #define data_barrier(x) \
asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory"); asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory");
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
__lwsync(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
__lwsync(); \
___p1; \
})
#endif /* _ASM_POWERPC_BARRIER_H */ #endif /* _ASM_POWERPC_BARRIER_H */
...@@ -32,4 +32,19 @@ ...@@ -32,4 +32,19 @@
#define set_mb(var, value) do { var = value; mb(); } while (0) #define set_mb(var, value) do { var = value; mb(); } while (0)
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
barrier(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
barrier(); \
___p1; \
})
#endif /* __ASM_BARRIER_H */ #endif /* __ASM_BARRIER_H */
...@@ -53,4 +53,19 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ ...@@ -53,4 +53,19 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
#define smp_read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0)
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
barrier(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
barrier(); \
___p1; \
})
#endif /* !(__SPARC64_BARRIER_H) */ #endif /* !(__SPARC64_BARRIER_H) */
...@@ -92,12 +92,53 @@ ...@@ -92,12 +92,53 @@
#endif #endif
#define smp_read_barrier_depends() read_barrier_depends() #define smp_read_barrier_depends() read_barrier_depends()
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) #define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
#else #else /* !SMP */
#define smp_mb() barrier() #define smp_mb() barrier()
#define smp_rmb() barrier() #define smp_rmb() barrier()
#define smp_wmb() barrier() #define smp_wmb() barrier()
#define smp_read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0)
#define set_mb(var, value) do { var = value; barrier(); } while (0) #define set_mb(var, value) do { var = value; barrier(); } while (0)
#endif /* SMP */
#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
/*
* For either of these options x86 doesn't have a strong TSO memory
* model and we should fall back to full barriers.
*/
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
___p1; \
})
#else /* regular x86 TSO memory ordering */
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
barrier(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
barrier(); \
___p1; \
})
#endif #endif
/* /*
......
...@@ -46,5 +46,20 @@ ...@@ -46,5 +46,20 @@
#define read_barrier_depends() do {} while (0) #define read_barrier_depends() do {} while (0)
#define smp_read_barrier_depends() do {} while (0) #define smp_read_barrier_depends() do {} while (0)
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
___p1; \
})
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#endif /* __ASM_GENERIC_BARRIER_H */ #endif /* __ASM_GENERIC_BARRIER_H */
...@@ -302,6 +302,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); ...@@ -302,6 +302,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
#endif #endif
/* Is this type a native word size -- useful for atomic operations */
#ifndef __native_word
# define __native_word(t) (sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
#endif
/* Compile time object size, -1 for unknown */ /* Compile time object size, -1 for unknown */
#ifndef __compiletime_object_size #ifndef __compiletime_object_size
# define __compiletime_object_size(obj) -1 # define __compiletime_object_size(obj) -1
...@@ -341,6 +346,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); ...@@ -341,6 +346,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
#define compiletime_assert(condition, msg) \ #define compiletime_assert(condition, msg) \
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
#define compiletime_assert_atomic_type(t) \
compiletime_assert(__native_word(t), \
"Need native word sized stores/loads for atomicity.")
/* /*
* Prevent the compiler from merging or refetching accesses. The compiler * Prevent the compiler from merging or refetching accesses. The compiler
* is also forbidden from reordering successive instances of ACCESS_ONCE(), * is also forbidden from reordering successive instances of ACCESS_ONCE(),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment