Commit 0ea366f5 authored by Will Deacon's avatar Will Deacon

arm64: atomics: prefetch the destination word for write prior to stxr

The cost of changing a cacheline from shared to exclusive state can be
significant, especially when this is triggered by an exclusive store,
since it may result in having to retry the transaction.

This patch makes use of prfm to prefetch cachelines for write prior to
ldxr/stxr loops when using the ll/sc atomic routines.
Reviewed-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
Signed-off-by: default avatarWill Deacon <will.deacon@arm.com>
parent a82e6238
...@@ -45,6 +45,7 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \ ...@@ -45,6 +45,7 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \
int result; \ int result; \
\ \
asm volatile("// atomic_" #op "\n" \ asm volatile("// atomic_" #op "\n" \
" prfm pstl1strm, %2\n" \
"1: ldxr %w0, %2\n" \ "1: ldxr %w0, %2\n" \
" " #asm_op " %w0, %w0, %w3\n" \ " " #asm_op " %w0, %w0, %w3\n" \
" stxr %w1, %w0, %2\n" \ " stxr %w1, %w0, %2\n" \
...@@ -62,6 +63,7 @@ __LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v)) \ ...@@ -62,6 +63,7 @@ __LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v)) \
int result; \ int result; \
\ \
asm volatile("// atomic_" #op "_return\n" \ asm volatile("// atomic_" #op "_return\n" \
" prfm pstl1strm, %2\n" \
"1: ldxr %w0, %2\n" \ "1: ldxr %w0, %2\n" \
" " #asm_op " %w0, %w0, %w3\n" \ " " #asm_op " %w0, %w0, %w3\n" \
" stlxr %w1, %w0, %2\n" \ " stlxr %w1, %w0, %2\n" \
...@@ -98,6 +100,7 @@ __LL_SC_PREFIX(atomic_cmpxchg(atomic_t *ptr, int old, int new)) ...@@ -98,6 +100,7 @@ __LL_SC_PREFIX(atomic_cmpxchg(atomic_t *ptr, int old, int new))
int oldval; int oldval;
asm volatile("// atomic_cmpxchg\n" asm volatile("// atomic_cmpxchg\n"
" prfm pstl1strm, %2\n"
"1: ldxr %w1, %2\n" "1: ldxr %w1, %2\n"
" eor %w0, %w1, %w3\n" " eor %w0, %w1, %w3\n"
" cbnz %w0, 2f\n" " cbnz %w0, 2f\n"
...@@ -121,6 +124,7 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \ ...@@ -121,6 +124,7 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \
unsigned long tmp; \ unsigned long tmp; \
\ \
asm volatile("// atomic64_" #op "\n" \ asm volatile("// atomic64_" #op "\n" \
" prfm pstl1strm, %2\n" \
"1: ldxr %0, %2\n" \ "1: ldxr %0, %2\n" \
" " #asm_op " %0, %0, %3\n" \ " " #asm_op " %0, %0, %3\n" \
" stxr %w1, %0, %2\n" \ " stxr %w1, %0, %2\n" \
...@@ -138,6 +142,7 @@ __LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v)) \ ...@@ -138,6 +142,7 @@ __LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v)) \
unsigned long tmp; \ unsigned long tmp; \
\ \
asm volatile("// atomic64_" #op "_return\n" \ asm volatile("// atomic64_" #op "_return\n" \
" prfm pstl1strm, %2\n" \
"1: ldxr %0, %2\n" \ "1: ldxr %0, %2\n" \
" " #asm_op " %0, %0, %3\n" \ " " #asm_op " %0, %0, %3\n" \
" stlxr %w1, %0, %2\n" \ " stlxr %w1, %0, %2\n" \
...@@ -174,6 +179,7 @@ __LL_SC_PREFIX(atomic64_cmpxchg(atomic64_t *ptr, long old, long new)) ...@@ -174,6 +179,7 @@ __LL_SC_PREFIX(atomic64_cmpxchg(atomic64_t *ptr, long old, long new))
unsigned long res; unsigned long res;
asm volatile("// atomic64_cmpxchg\n" asm volatile("// atomic64_cmpxchg\n"
" prfm pstl1strm, %2\n"
"1: ldxr %1, %2\n" "1: ldxr %1, %2\n"
" eor %0, %1, %3\n" " eor %0, %1, %3\n"
" cbnz %w0, 2f\n" " cbnz %w0, 2f\n"
...@@ -196,6 +202,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) ...@@ -196,6 +202,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
unsigned long tmp; unsigned long tmp;
asm volatile("// atomic64_dec_if_positive\n" asm volatile("// atomic64_dec_if_positive\n"
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n" "1: ldxr %0, %2\n"
" subs %0, %0, #1\n" " subs %0, %0, #1\n"
" b.mi 2f\n" " b.mi 2f\n"
...@@ -220,6 +227,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ ...@@ -220,6 +227,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
unsigned long tmp, oldval; \ unsigned long tmp, oldval; \
\ \
asm volatile( \ asm volatile( \
" prfm pstl1strm, %2\n" \
"1: ldxr" #sz "\t%" #w "[oldval], %[v]\n" \ "1: ldxr" #sz "\t%" #w "[oldval], %[v]\n" \
" eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \ " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \
" cbnz %" #w "[tmp], 2f\n" \ " cbnz %" #w "[tmp], 2f\n" \
...@@ -259,6 +267,7 @@ __LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1, \ ...@@ -259,6 +267,7 @@ __LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1, \
unsigned long tmp, ret; \ unsigned long tmp, ret; \
\ \
asm volatile("// __cmpxchg_double" #name "\n" \ asm volatile("// __cmpxchg_double" #name "\n" \
" prfm pstl1strm, %2\n" \
"1: ldxp %0, %1, %2\n" \ "1: ldxp %0, %1, %2\n" \
" eor %0, %0, %3\n" \ " eor %0, %0, %3\n" \
" eor %1, %1, %4\n" \ " eor %1, %1, %4\n" \
......
...@@ -33,12 +33,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size ...@@ -33,12 +33,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
case 1: case 1:
asm volatile(ARM64_LSE_ATOMIC_INSN( asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */ /* LL/SC */
" prfm pstl1strm, %2\n"
"1: ldxrb %w0, %2\n" "1: ldxrb %w0, %2\n"
" stlxrb %w1, %w3, %2\n" " stlxrb %w1, %w3, %2\n"
" cbnz %w1, 1b\n" " cbnz %w1, 1b\n"
" dmb ish", " dmb ish",
/* LSE atomics */ /* LSE atomics */
" nop\n" " nop\n"
" nop\n"
" swpalb %w3, %w0, %2\n" " swpalb %w3, %w0, %2\n"
" nop\n" " nop\n"
" nop") " nop")
...@@ -49,12 +51,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size ...@@ -49,12 +51,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
case 2: case 2:
asm volatile(ARM64_LSE_ATOMIC_INSN( asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */ /* LL/SC */
" prfm pstl1strm, %2\n"
"1: ldxrh %w0, %2\n" "1: ldxrh %w0, %2\n"
" stlxrh %w1, %w3, %2\n" " stlxrh %w1, %w3, %2\n"
" cbnz %w1, 1b\n" " cbnz %w1, 1b\n"
" dmb ish", " dmb ish",
/* LSE atomics */ /* LSE atomics */
" nop\n" " nop\n"
" nop\n"
" swpalh %w3, %w0, %2\n" " swpalh %w3, %w0, %2\n"
" nop\n" " nop\n"
" nop") " nop")
...@@ -65,12 +69,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size ...@@ -65,12 +69,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
case 4: case 4:
asm volatile(ARM64_LSE_ATOMIC_INSN( asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */ /* LL/SC */
" prfm pstl1strm, %2\n"
"1: ldxr %w0, %2\n" "1: ldxr %w0, %2\n"
" stlxr %w1, %w3, %2\n" " stlxr %w1, %w3, %2\n"
" cbnz %w1, 1b\n" " cbnz %w1, 1b\n"
" dmb ish", " dmb ish",
/* LSE atomics */ /* LSE atomics */
" nop\n" " nop\n"
" nop\n"
" swpal %w3, %w0, %2\n" " swpal %w3, %w0, %2\n"
" nop\n" " nop\n"
" nop") " nop")
...@@ -81,12 +87,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size ...@@ -81,12 +87,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
case 8: case 8:
asm volatile(ARM64_LSE_ATOMIC_INSN( asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */ /* LL/SC */
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n" "1: ldxr %0, %2\n"
" stlxr %w1, %3, %2\n" " stlxr %w1, %3, %2\n"
" cbnz %w1, 1b\n" " cbnz %w1, 1b\n"
" dmb ish", " dmb ish",
/* LSE atomics */ /* LSE atomics */
" nop\n" " nop\n"
" nop\n"
" swpal %3, %0, %2\n" " swpal %3, %0, %2\n"
" nop\n" " nop\n"
" nop") " nop")
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
asm volatile( \ asm volatile( \
ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN) \ CONFIG_ARM64_PAN) \
" prfm pstl1strm, %2\n" \
"1: ldxr %w1, %2\n" \ "1: ldxr %w1, %2\n" \
insn "\n" \ insn "\n" \
"2: stlxr %w3, %w0, %2\n" \ "2: stlxr %w3, %w0, %2\n" \
...@@ -120,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, ...@@ -120,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
return -EFAULT; return -EFAULT;
asm volatile("// futex_atomic_cmpxchg_inatomic\n" asm volatile("// futex_atomic_cmpxchg_inatomic\n"
" prfm pstl1strm, %2\n"
"1: ldxr %w1, %2\n" "1: ldxr %w1, %2\n"
" sub %w3, %w1, %w4\n" " sub %w3, %w1, %w4\n"
" cbnz %w3, 3f\n" " cbnz %w3, 3f\n"
......
...@@ -31,6 +31,7 @@ ENTRY( \name ) ...@@ -31,6 +31,7 @@ ENTRY( \name )
eor w0, w0, w3 // Clear low bits eor w0, w0, w3 // Clear low bits
mov x2, #1 mov x2, #1
add x1, x1, x0, lsr #3 // Get word offset add x1, x1, x0, lsr #3 // Get word offset
alt_lse " prfm pstl1strm, [x1]", "nop"
lsl x3, x2, x3 // Create mask lsl x3, x2, x3 // Create mask
alt_lse "1: ldxr x2, [x1]", "\lse x3, [x1]" alt_lse "1: ldxr x2, [x1]", "\lse x3, [x1]"
...@@ -48,6 +49,7 @@ ENTRY( \name ) ...@@ -48,6 +49,7 @@ ENTRY( \name )
eor w0, w0, w3 // Clear low bits eor w0, w0, w3 // Clear low bits
mov x2, #1 mov x2, #1
add x1, x1, x0, lsr #3 // Get word offset add x1, x1, x0, lsr #3 // Get word offset
alt_lse " prfm pstl1strm, [x1]", "nop"
lsl x4, x2, x3 // Create mask lsl x4, x2, x3 // Create mask
alt_lse "1: ldxr x2, [x1]", "\lse x4, x2, [x1]" alt_lse "1: ldxr x2, [x1]", "\lse x4, x2, [x1]"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment