Commit c427f695 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

locking/mutex: Optimize __mutex_trylock_fast()

Use try_cmpxchg to avoid the pointless TEST instruction..
And add the (missing) atomic_long_try_cmpxchg*() wrappery.

On x86_64 this gives:

0000000000000710 <mutex_lock>:						0000000000000710 <mutex_lock>:
 710:   65 48 8b 14 25 00 00    mov    %gs:0x0,%rdx                      710:   65 48 8b 14 25 00 00    mov    %gs:0x0,%rdx
 717:   00 00                                                            717:   00 00
                        715: R_X86_64_32S       current_task                                    715: R_X86_64_32S       current_task
 719:   31 c0                   xor    %eax,%eax                         719:   31 c0                   xor    %eax,%eax
 71b:   f0 48 0f b1 17          lock cmpxchg %rdx,(%rdi)                 71b:   f0 48 0f b1 17          lock cmpxchg %rdx,(%rdi)
 720:   48 85 c0                test   %rax,%rax                         720:   75 02                   jne    724 <mutex_lock+0x14>
 723:   75 02                   jne    727 <mutex_lock+0x17>             722:   f3 c3                   repz retq
 725:   f3 c3                   repz retq                                724:   eb da                   jmp    700 <__mutex_lock_slowpath>
 727:   eb d7                   jmp    700 <__mutex_lock_slowpath>       726:   66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
 729:   0f 1f 80 00 00 00 00    nopl   0x0(%rax)                         72d:   00 00 00

On ARM64 this gives:

000000000000638 <mutex_lock>:						0000000000000638 <mutex_lock>:
     638:       d5384101        mrs     x1, sp_el0                           638:       d5384101        mrs     x1, sp_el0
     63c:       d2800002        mov     x2, #0x0                             63c:       d2800002        mov     x2, #0x0
     640:       f9800011        prfm    pstl1strm, [x0]                      640:       f9800011        prfm    pstl1strm, [x0]
     644:       c85ffc03        ldaxr   x3, [x0]                             644:       c85ffc03        ldaxr   x3, [x0]
     648:       ca020064        eor     x4, x3, x2                           648:       ca020064        eor     x4, x3, x2
     64c:       b5000064        cbnz    x4, 658 <mutex_lock+0x20>            64c:       b5000064        cbnz    x4, 658 <mutex_lock+0x20>
     650:       c8047c01        stxr    w4, x1, [x0]                         650:       c8047c01        stxr    w4, x1, [x0]
     654:       35ffff84        cbnz    w4, 644 <mutex_lock+0xc>             654:       35ffff84        cbnz    w4, 644 <mutex_lock+0xc>
     658:       b40000c3        cbz     x3, 670 <mutex_lock+0x38>            658:       b5000043        cbnz    x3, 660 <mutex_lock+0x28>
     65c:       a9bf7bfd        stp     x29, x30, [sp,#-16]!                 65c:       d65f03c0        ret
     660:       910003fd        mov     x29, sp                              660:       a9bf7bfd        stp     x29, x30, [sp,#-16]!
     664:       97ffffef        bl      620 <__mutex_lock_slowpath>          664:       910003fd        mov     x29, sp
     668:       a8c17bfd        ldp     x29, x30, [sp],#16                   668:       97ffffee        bl      620 <__mutex_lock_slowpath>
     66c:       d65f03c0        ret                                          66c:       a8c17bfd        ldp     x29, x30, [sp],#16
     670:       d65f03c0        ret                                          670:       d65f03c0        ret
Reported-by: default avatarMatthew Wilcox <mawilcox@microsoft.com>
Acked-by: default avatarWill Deacon <will.deacon@arm.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent de5b55c1
...@@ -25,6 +25,7 @@ typedef atomic64_t atomic_long_t; ...@@ -25,6 +25,7 @@ typedef atomic64_t atomic_long_t;
#define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i) #define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i)
#define ATOMIC_LONG_PFX(x) atomic64 ## x #define ATOMIC_LONG_PFX(x) atomic64 ## x
#define ATOMIC_LONG_TYPE s64
#else #else
...@@ -32,6 +33,7 @@ typedef atomic_t atomic_long_t; ...@@ -32,6 +33,7 @@ typedef atomic_t atomic_long_t;
#define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i) #define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i)
#define ATOMIC_LONG_PFX(x) atomic ## x #define ATOMIC_LONG_PFX(x) atomic ## x
#define ATOMIC_LONG_TYPE int
#endif #endif
...@@ -90,6 +92,21 @@ ATOMIC_LONG_ADD_SUB_OP(sub, _release) ...@@ -90,6 +92,21 @@ ATOMIC_LONG_ADD_SUB_OP(sub, _release)
#define atomic_long_cmpxchg(l, old, new) \ #define atomic_long_cmpxchg(l, old, new) \
(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new))) (ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
#define atomic_long_try_cmpxchg_relaxed(l, old, new) \
(ATOMIC_LONG_PFX(_try_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \
(ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
#define atomic_long_try_cmpxchg_acquire(l, old, new) \
(ATOMIC_LONG_PFX(_try_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \
(ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
#define atomic_long_try_cmpxchg_release(l, old, new) \
(ATOMIC_LONG_PFX(_try_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \
(ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
#define atomic_long_try_cmpxchg(l, old, new) \
(ATOMIC_LONG_PFX(_try_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), \
(ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
#define atomic_long_xchg_relaxed(v, new) \ #define atomic_long_xchg_relaxed(v, new) \
(ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new))) (ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
#define atomic_long_xchg_acquire(v, new) \ #define atomic_long_xchg_acquire(v, new) \
......
...@@ -139,8 +139,9 @@ static inline bool __mutex_trylock(struct mutex *lock) ...@@ -139,8 +139,9 @@ static inline bool __mutex_trylock(struct mutex *lock)
static __always_inline bool __mutex_trylock_fast(struct mutex *lock) static __always_inline bool __mutex_trylock_fast(struct mutex *lock)
{ {
unsigned long curr = (unsigned long)current; unsigned long curr = (unsigned long)current;
unsigned long zero = 0UL;
if (!atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr)) if (atomic_long_try_cmpxchg_acquire(&lock->owner, &zero, curr))
return true; return true;
return false; return false;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment