Commit 1d84afaf authored by Alexandre Ghiti's avatar Alexandre Ghiti Committed by Palmer Dabbelt

riscv: Fix fully ordered LR/SC xchg[8|16]() implementations

The fully ordered versions of xchg[8|16]() using LR/SC lack the
necessary memory barriers to guarantee the order.

Fix this by matching what is already implemented in the fully ordered
versions of cmpxchg() using LR/SC.
Suggested-by: default avatarAndrea Parri <parri.andrea@gmail.com>
Reported-by: default avatarAndrea Parri <parri.andrea@gmail.com>
Closes: https://lore.kernel.org/linux-riscv/ZlYbupL5XgzgA0MX@andrea/T/#u
Fixes: a8ed2b7a ("riscv/cmpxchg: Implement xchg for variables of size 1 and 2")
Signed-off-by: default avatarAlexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: default avatarAndrea Parri <parri.andrea@gmail.com>
Link: https://lore.kernel.org/r/20240530145546.394248-1-alexghiti@rivosinc.comSigned-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parent 982a7eb9
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
#include <asm/fence.h> #include <asm/fence.h>
#define __arch_xchg_masked(prepend, append, r, p, n) \ #define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \
({ \ ({ \
u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
"0: lr.w %0, %2\n" \ "0: lr.w %0, %2\n" \
" and %1, %0, %z4\n" \ " and %1, %0, %z4\n" \
" or %1, %1, %z3\n" \ " or %1, %1, %z3\n" \
" sc.w %1, %1, %2\n" \ " sc.w" sc_sfx " %1, %1, %2\n" \
" bnez %1, 0b\n" \ " bnez %1, 0b\n" \
append \ append \
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
...@@ -46,7 +46,8 @@ ...@@ -46,7 +46,8 @@
: "memory"); \ : "memory"); \
}) })
#define _arch_xchg(ptr, new, sfx, prepend, append) \ #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \
sc_append, swap_append) \
({ \ ({ \
__typeof__(ptr) __ptr = (ptr); \ __typeof__(ptr) __ptr = (ptr); \
__typeof__(*(__ptr)) __new = (new); \ __typeof__(*(__ptr)) __new = (new); \
...@@ -55,15 +56,15 @@ ...@@ -55,15 +56,15 @@
switch (sizeof(*__ptr)) { \ switch (sizeof(*__ptr)) { \
case 1: \ case 1: \
case 2: \ case 2: \
__arch_xchg_masked(prepend, append, \ __arch_xchg_masked(sc_sfx, prepend, sc_append, \
__ret, __ptr, __new); \ __ret, __ptr, __new); \
break; \ break; \
case 4: \ case 4: \
__arch_xchg(".w" sfx, prepend, append, \ __arch_xchg(".w" swap_sfx, prepend, swap_append, \
__ret, __ptr, __new); \ __ret, __ptr, __new); \
break; \ break; \
case 8: \ case 8: \
__arch_xchg(".d" sfx, prepend, append, \ __arch_xchg(".d" swap_sfx, prepend, swap_append, \
__ret, __ptr, __new); \ __ret, __ptr, __new); \
break; \ break; \
default: \ default: \
...@@ -73,16 +74,17 @@ ...@@ -73,16 +74,17 @@
}) })
#define arch_xchg_relaxed(ptr, x) \ #define arch_xchg_relaxed(ptr, x) \
_arch_xchg(ptr, x, "", "", "") _arch_xchg(ptr, x, "", "", "", "", "")
#define arch_xchg_acquire(ptr, x) \ #define arch_xchg_acquire(ptr, x) \
_arch_xchg(ptr, x, "", "", RISCV_ACQUIRE_BARRIER) _arch_xchg(ptr, x, "", "", "", \
RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
#define arch_xchg_release(ptr, x) \ #define arch_xchg_release(ptr, x) \
_arch_xchg(ptr, x, "", RISCV_RELEASE_BARRIER, "") _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
#define arch_xchg(ptr, x) \ #define arch_xchg(ptr, x) \
_arch_xchg(ptr, x, ".aqrl", "", "") _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
#define xchg32(ptr, x) \ #define xchg32(ptr, x) \
({ \ ({ \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment