Commit 2c9046b7 authored by Palmer Dabbelt's avatar Palmer Dabbelt

RISC-V: Assorted memory model fixes

These fixes fall into three categories

* The definiton of __smp_{store_release,load_acquire}, which allow us to
  emit a full fence when unnecessary.
* Fixes to avoid relying on the behavior of "*.aqrl" atomics, as those
  are specified in the currently released RISC-V memory model draft in
  a way that makes them useless for Linux.  This might change in the
  future, but now the code matches the memory model spec as it's written
  so at least we're getting closer to something sane.  The actual fix is
  to delete the RISC-V specific atomics and drop back to generic
  versions that use the new fences from above.
* Cleanups to our atomic macros, which are mostly non-functional
  changes.

Unfortunately I haven't given these as thorough of a testing as I
probably should have, but I've poked through the code and they seem
generally OK.
parents 1cead2d7 5ce6c1f3
This diff is collapsed.
...@@ -38,6 +38,21 @@ ...@@ -38,6 +38,21 @@
#define __smp_rmb() RISCV_FENCE(r,r) #define __smp_rmb() RISCV_FENCE(r,r)
#define __smp_wmb() RISCV_FENCE(w,w) #define __smp_wmb() RISCV_FENCE(w,w)
#define __smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
RISCV_FENCE(rw,w); \
WRITE_ONCE(*p, v); \
} while (0)
#define __smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = READ_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
RISCV_FENCE(r,rw); \
___p1; \
})
/* /*
* This is a very specific barrier: it's currently only used in two places in * This is a very specific barrier: it's currently only used in two places in
* the kernel, both in the scheduler. See include/linux/spinlock.h for the two * the kernel, both in the scheduler. See include/linux/spinlock.h for the two
......
...@@ -17,8 +17,9 @@ ...@@ -17,8 +17,9 @@
#include <linux/bug.h> #include <linux/bug.h>
#include <asm/barrier.h> #include <asm/barrier.h>
#include <asm/fence.h>
#define __xchg(new, ptr, size, asm_or) \ #define __xchg_relaxed(ptr, new, size) \
({ \ ({ \
__typeof__(ptr) __ptr = (ptr); \ __typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \ __typeof__(new) __new = (new); \
...@@ -26,14 +27,14 @@ ...@@ -26,14 +27,14 @@
switch (size) { \ switch (size) { \
case 4: \ case 4: \
__asm__ __volatile__ ( \ __asm__ __volatile__ ( \
"amoswap.w" #asm_or " %0, %2, %1" \ " amoswap.w %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \ : "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \ : "r" (__new) \
: "memory"); \ : "memory"); \
break; \ break; \
case 8: \ case 8: \
__asm__ __volatile__ ( \ __asm__ __volatile__ ( \
"amoswap.d" #asm_or " %0, %2, %1" \ " amoswap.d %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \ : "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \ : "r" (__new) \
: "memory"); \ : "memory"); \
...@@ -44,7 +45,114 @@ ...@@ -44,7 +45,114 @@
__ret; \ __ret; \
}) })
#define xchg(ptr, x) (__xchg((x), (ptr), sizeof(*(ptr)), .aqrl)) #define xchg_relaxed(ptr, x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_relaxed((ptr), \
_x_, sizeof(*(ptr))); \
})
#define __xchg_acquire(ptr, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
__typeof__(*(ptr)) __ret; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
" amoswap.w %0, %2, %1\n" \
RISCV_ACQUIRE_BARRIER \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
" amoswap.d %0, %2, %1\n" \
RISCV_ACQUIRE_BARRIER \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define xchg_acquire(ptr, x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_acquire((ptr), \
_x_, sizeof(*(ptr))); \
})
#define __xchg_release(ptr, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
__typeof__(*(ptr)) __ret; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
RISCV_RELEASE_BARRIER \
" amoswap.w %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
RISCV_RELEASE_BARRIER \
" amoswap.d %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define xchg_release(ptr, x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_release((ptr), \
_x_, sizeof(*(ptr))); \
})
#define __xchg(ptr, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
__typeof__(*(ptr)) __ret; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
" amoswap.w.aqrl %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
" amoswap.d.aqrl %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define xchg(ptr, x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg((ptr), _x_, sizeof(*(ptr))); \
})
#define xchg32(ptr, x) \ #define xchg32(ptr, x) \
({ \ ({ \
...@@ -63,7 +171,51 @@ ...@@ -63,7 +171,51 @@
* store NEW in MEM. Return the initial value in MEM. Success is * store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD. * indicated by comparing RETURN with OLD.
*/ */
#define __cmpxchg(ptr, old, new, size, lrb, scb) \ #define __cmpxchg_relaxed(ptr, old, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
__typeof__(*(ptr)) __ret; \
register unsigned int __rc; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
"0: lr.w %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.w %1, %z4, %2\n" \
" bnez %1, 0b\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
"0: lr.d %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.d %1, %z4, %2\n" \
" bnez %1, 0b\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define cmpxchg_relaxed(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
(__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
_o_, _n_, sizeof(*(ptr))); \
})
#define __cmpxchg_acquire(ptr, old, new, size) \
({ \ ({ \
__typeof__(ptr) __ptr = (ptr); \ __typeof__(ptr) __ptr = (ptr); \
__typeof__(*(ptr)) __old = (old); \ __typeof__(*(ptr)) __old = (old); \
...@@ -73,24 +225,116 @@ ...@@ -73,24 +225,116 @@
switch (size) { \ switch (size) { \
case 4: \ case 4: \
__asm__ __volatile__ ( \ __asm__ __volatile__ ( \
"0:" \ "0: lr.w %0, %2\n" \
"lr.w" #scb " %0, %2\n" \ " bne %0, %z3, 1f\n" \
"bne %0, %z3, 1f\n" \ " sc.w %1, %z4, %2\n" \
"sc.w" #lrb " %1, %z4, %2\n" \ " bnez %1, 0b\n" \
"bnez %1, 0b\n" \ RISCV_ACQUIRE_BARRIER \
"1:" \ "1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \ : "rJ" (__old), "rJ" (__new) \
: "memory"); \ : "memory"); \
break; \ break; \
case 8: \ case 8: \
__asm__ __volatile__ ( \ __asm__ __volatile__ ( \
"0:" \ "0: lr.d %0, %2\n" \
"lr.d" #scb " %0, %2\n" \ " bne %0, %z3, 1f\n" \
"bne %0, %z3, 1f\n" \ " sc.d %1, %z4, %2\n" \
"sc.d" #lrb " %1, %z4, %2\n" \ " bnez %1, 0b\n" \
"bnez %1, 0b\n" \ RISCV_ACQUIRE_BARRIER \
"1:" \ "1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define cmpxchg_acquire(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
(__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \
_o_, _n_, sizeof(*(ptr))); \
})
#define __cmpxchg_release(ptr, old, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
__typeof__(*(ptr)) __ret; \
register unsigned int __rc; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
RISCV_RELEASE_BARRIER \
"0: lr.w %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.w %1, %z4, %2\n" \
" bnez %1, 0b\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
RISCV_RELEASE_BARRIER \
"0: lr.d %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.d %1, %z4, %2\n" \
" bnez %1, 0b\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define cmpxchg_release(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
(__typeof__(*(ptr))) __cmpxchg_release((ptr), \
_o_, _n_, sizeof(*(ptr))); \
})
#define __cmpxchg(ptr, old, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
__typeof__(*(ptr)) __ret; \
register unsigned int __rc; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
"0: lr.w %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.w.rl %1, %z4, %2\n" \
" bnez %1, 0b\n" \
" fence rw, rw\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
"0: lr.d %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.d.rl %1, %z4, %2\n" \
" bnez %1, 0b\n" \
" fence rw, rw\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \ : "rJ" (__old), "rJ" (__new) \
: "memory"); \ : "memory"); \
...@@ -102,10 +346,15 @@ ...@@ -102,10 +346,15 @@
}) })
#define cmpxchg(ptr, o, n) \ #define cmpxchg(ptr, o, n) \
(__cmpxchg((ptr), (o), (n), sizeof(*(ptr)), .aqrl, .aqrl)) ({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
(__typeof__(*(ptr))) __cmpxchg((ptr), \
_o_, _n_, sizeof(*(ptr))); \
})
#define cmpxchg_local(ptr, o, n) \ #define cmpxchg_local(ptr, o, n) \
(__cmpxchg((ptr), (o), (n), sizeof(*(ptr)), , )) (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
#define cmpxchg32(ptr, o, n) \ #define cmpxchg32(ptr, o, n) \
({ \ ({ \
...@@ -116,7 +365,7 @@ ...@@ -116,7 +365,7 @@
#define cmpxchg32_local(ptr, o, n) \ #define cmpxchg32_local(ptr, o, n) \
({ \ ({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
cmpxchg_local((ptr), (o), (n)); \ cmpxchg_relaxed((ptr), (o), (n)) \
}) })
#define cmpxchg64(ptr, o, n) \ #define cmpxchg64(ptr, o, n) \
...@@ -128,7 +377,7 @@ ...@@ -128,7 +377,7 @@
#define cmpxchg64_local(ptr, o, n) \ #define cmpxchg64_local(ptr, o, n) \
({ \ ({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
cmpxchg_local((ptr), (o), (n)); \ cmpxchg_relaxed((ptr), (o), (n)); \
}) })
#endif /* _ASM_RISCV_CMPXCHG_H */ #endif /* _ASM_RISCV_CMPXCHG_H */
#ifndef _ASM_RISCV_FENCE_H
#define _ASM_RISCV_FENCE_H
#ifdef CONFIG_SMP
#define RISCV_ACQUIRE_BARRIER "\tfence r , rw\n"
#define RISCV_RELEASE_BARRIER "\tfence rw, w\n"
#else
#define RISCV_ACQUIRE_BARRIER
#define RISCV_RELEASE_BARRIER
#endif
#endif /* _ASM_RISCV_FENCE_H */
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <asm/current.h> #include <asm/current.h>
#include <asm/fence.h>
/* /*
* Simple spin lock operations. These provide no fairness guarantees. * Simple spin lock operations. These provide no fairness guarantees.
...@@ -28,10 +29,7 @@ ...@@ -28,10 +29,7 @@
static inline void arch_spin_unlock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock)
{ {
__asm__ __volatile__ ( smp_store_release(&lock->lock, 0);
"amoswap.w.rl x0, x0, %0"
: "=A" (lock->lock)
:: "memory");
} }
static inline int arch_spin_trylock(arch_spinlock_t *lock) static inline int arch_spin_trylock(arch_spinlock_t *lock)
...@@ -39,7 +37,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) ...@@ -39,7 +37,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
int tmp = 1, busy; int tmp = 1, busy;
__asm__ __volatile__ ( __asm__ __volatile__ (
"amoswap.w.aq %0, %2, %1" " amoswap.w %0, %2, %1\n"
RISCV_ACQUIRE_BARRIER
: "=r" (busy), "+A" (lock->lock) : "=r" (busy), "+A" (lock->lock)
: "r" (tmp) : "r" (tmp)
: "memory"); : "memory");
...@@ -68,8 +67,9 @@ static inline void arch_read_lock(arch_rwlock_t *lock) ...@@ -68,8 +67,9 @@ static inline void arch_read_lock(arch_rwlock_t *lock)
"1: lr.w %1, %0\n" "1: lr.w %1, %0\n"
" bltz %1, 1b\n" " bltz %1, 1b\n"
" addi %1, %1, 1\n" " addi %1, %1, 1\n"
" sc.w.aq %1, %1, %0\n" " sc.w %1, %1, %0\n"
" bnez %1, 1b\n" " bnez %1, 1b\n"
RISCV_ACQUIRE_BARRIER
: "+A" (lock->lock), "=&r" (tmp) : "+A" (lock->lock), "=&r" (tmp)
:: "memory"); :: "memory");
} }
...@@ -82,8 +82,9 @@ static inline void arch_write_lock(arch_rwlock_t *lock) ...@@ -82,8 +82,9 @@ static inline void arch_write_lock(arch_rwlock_t *lock)
"1: lr.w %1, %0\n" "1: lr.w %1, %0\n"
" bnez %1, 1b\n" " bnez %1, 1b\n"
" li %1, -1\n" " li %1, -1\n"
" sc.w.aq %1, %1, %0\n" " sc.w %1, %1, %0\n"
" bnez %1, 1b\n" " bnez %1, 1b\n"
RISCV_ACQUIRE_BARRIER
: "+A" (lock->lock), "=&r" (tmp) : "+A" (lock->lock), "=&r" (tmp)
:: "memory"); :: "memory");
} }
...@@ -96,8 +97,9 @@ static inline int arch_read_trylock(arch_rwlock_t *lock) ...@@ -96,8 +97,9 @@ static inline int arch_read_trylock(arch_rwlock_t *lock)
"1: lr.w %1, %0\n" "1: lr.w %1, %0\n"
" bltz %1, 1f\n" " bltz %1, 1f\n"
" addi %1, %1, 1\n" " addi %1, %1, 1\n"
" sc.w.aq %1, %1, %0\n" " sc.w %1, %1, %0\n"
" bnez %1, 1b\n" " bnez %1, 1b\n"
RISCV_ACQUIRE_BARRIER
"1:\n" "1:\n"
: "+A" (lock->lock), "=&r" (busy) : "+A" (lock->lock), "=&r" (busy)
:: "memory"); :: "memory");
...@@ -113,8 +115,9 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) ...@@ -113,8 +115,9 @@ static inline int arch_write_trylock(arch_rwlock_t *lock)
"1: lr.w %1, %0\n" "1: lr.w %1, %0\n"
" bnez %1, 1f\n" " bnez %1, 1f\n"
" li %1, -1\n" " li %1, -1\n"
" sc.w.aq %1, %1, %0\n" " sc.w %1, %1, %0\n"
" bnez %1, 1b\n" " bnez %1, 1b\n"
RISCV_ACQUIRE_BARRIER
"1:\n" "1:\n"
: "+A" (lock->lock), "=&r" (busy) : "+A" (lock->lock), "=&r" (busy)
:: "memory"); :: "memory");
...@@ -125,7 +128,8 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) ...@@ -125,7 +128,8 @@ static inline int arch_write_trylock(arch_rwlock_t *lock)
static inline void arch_read_unlock(arch_rwlock_t *lock) static inline void arch_read_unlock(arch_rwlock_t *lock)
{ {
__asm__ __volatile__( __asm__ __volatile__(
"amoadd.w.rl x0, %1, %0" RISCV_RELEASE_BARRIER
" amoadd.w x0, %1, %0\n"
: "+A" (lock->lock) : "+A" (lock->lock)
: "r" (-1) : "r" (-1)
: "memory"); : "memory");
...@@ -133,10 +137,7 @@ static inline void arch_read_unlock(arch_rwlock_t *lock) ...@@ -133,10 +137,7 @@ static inline void arch_read_unlock(arch_rwlock_t *lock)
static inline void arch_write_unlock(arch_rwlock_t *lock) static inline void arch_write_unlock(arch_rwlock_t *lock)
{ {
__asm__ __volatile__ ( smp_store_release(&lock->lock, 0);
"amoswap.w.rl x0, x0, %0"
: "=A" (lock->lock)
:: "memory");
} }
#endif /* _ASM_RISCV_SPINLOCK_H */ #endif /* _ASM_RISCV_SPINLOCK_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment