Commit 48fc82c4 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'locking-core-2024-05-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking updates from Ingo Molnar:

 - Over a dozen code generation micro-optimizations for the atomic
   and spinlock code

 - Add more __ro_after_init attributes

 - Robustify the lockdevent_*() macros

* tag 'locking-core-2024-05-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  locking/pvqspinlock/x86: Use _Q_LOCKED_VAL in PV_UNLOCK_ASM macro
  locking/qspinlock/x86: Micro-optimize virt_spin_lock()
  locking/atomic/x86: Merge __arch{,_try}_cmpxchg64_emu_local() with __arch{,_try}_cmpxchg64_emu()
  locking/atomic/x86: Introduce arch_try_cmpxchg64_local()
  locking/pvqspinlock/x86: Remove redundant CMP after CMPXCHG in __raw_callee_save___pv_queued_spin_unlock()
  locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h
  locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()
  locking/qspinlock: Use atomic_try_cmpxchg_relaxed() in xchg_tail()
  locking/atomic/x86: Define arch_atomic_sub() family using arch_atomic_add() functions
  locking/atomic/x86: Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions
  locking/atomic/x86: Introduce arch_atomic64_read_nonatomic() to x86_32
  locking/atomic/x86: Introduce arch_atomic64_try_cmpxchg() to x86_32
  locking/atomic/x86: Introduce arch_try_cmpxchg64() for !CONFIG_X86_CMPXCHG64
  locking/atomic/x86: Modernize x86_32 arch_{,try_}_cmpxchg64{,_local}()
  locking/atomic/x86: Correct the definition of __arch_try_cmpxchg128()
  x86/tsc: Make __use_tsc __ro_after_init
  x86/kvm: Make kvm_async_pf_enabled __ro_after_init
  context_tracking: Make context_tracking_key __ro_after_init
  jump_label,module: Don't alloc static_key_mod for __ro_after_init keys
  locking/qspinlock: Always evaluate lockevent* non-event parameter once
parents a7c840ba 532453e7
......@@ -86,11 +86,7 @@ static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
}
#define arch_atomic_add_return arch_atomic_add_return
static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
{
return arch_atomic_add_return(-i, v);
}
#define arch_atomic_sub_return arch_atomic_sub_return
#define arch_atomic_sub_return(i, v) arch_atomic_add_return(-(i), v)
static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
{
......@@ -98,11 +94,7 @@ static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
}
#define arch_atomic_fetch_add arch_atomic_fetch_add
static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v)
{
return xadd(&v->counter, -i);
}
#define arch_atomic_fetch_sub arch_atomic_fetch_sub
#define arch_atomic_fetch_sub(i, v) arch_atomic_fetch_add(-(i), v)
static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
......
......@@ -14,6 +14,32 @@ typedef struct {
#define ATOMIC64_INIT(val) { (val) }
/*
* Read an atomic64_t non-atomically.
*
* This is intended to be used in cases where a subsequent atomic operation
* will handle the torn value, and can be used to prime the first iteration
* of unconditional try_cmpxchg() loops, e.g.:
*
* s64 val = arch_atomic64_read_nonatomic(v);
* do { } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
*
* This is NOT safe to use where the value is not always checked by a
* subsequent atomic operation, such as in conditional try_cmpxchg() loops
* that can break before the atomic operation, e.g.:
*
* s64 val = arch_atomic64_read_nonatomic(v);
* do {
* if (condition(val))
* break;
* } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
*/
static __always_inline s64 arch_atomic64_read_nonatomic(const atomic64_t *v)
{
/* See comment in arch_atomic_read(). */
return __READ_ONCE(v->counter);
}
#define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...)
#ifndef ATOMIC64_EXPORT
#define ATOMIC64_DECL_ONE __ATOMIC64_DECL
......@@ -61,12 +87,18 @@ ATOMIC64_DECL(add_unless);
#undef __ATOMIC64_DECL
#undef ATOMIC64_EXPORT
static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
return arch_cmpxchg64(&v->counter, o, n);
return arch_cmpxchg64(&v->counter, old, new);
}
#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
{
return arch_try_cmpxchg64(&v->counter, old, new);
}
#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
{
s64 o;
......@@ -195,69 +227,62 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
s64 old, c = 0;
s64 val = arch_atomic64_read_nonatomic(v);
while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
c = old;
do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
}
static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
s64 old, c = 0;
s64 val = arch_atomic64_read_nonatomic(v);
while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
c = old;
do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
return old;
return val;
}
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
s64 old, c = 0;
s64 val = arch_atomic64_read_nonatomic(v);
while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
c = old;
do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
}
static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
s64 old, c = 0;
s64 val = arch_atomic64_read_nonatomic(v);
while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
c = old;
do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
return old;
return val;
}
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
s64 old, c = 0;
s64 val = arch_atomic64_read_nonatomic(v);
while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
c = old;
do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
}
static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
s64 old, c = 0;
s64 val = arch_atomic64_read_nonatomic(v);
while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
c = old;
do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
return old;
return val;
}
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
s64 old, c = 0;
s64 val = arch_atomic64_read_nonatomic(v);
while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c)
c = old;
do { } while (!arch_atomic64_try_cmpxchg(v, &val, val + i));
return old;
return val;
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
......
......@@ -80,11 +80,7 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
}
#define arch_atomic64_add_return arch_atomic64_add_return
static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
{
return arch_atomic64_add_return(-i, v);
}
#define arch_atomic64_sub_return arch_atomic64_sub_return
#define arch_atomic64_sub_return(i, v) arch_atomic64_add_return(-(i), v)
static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
......@@ -92,11 +88,7 @@ static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
static __always_inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
{
return xadd(&v->counter, -i);
}
#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
#define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), v)
static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
......
......@@ -3,103 +3,150 @@
#define _ASM_X86_CMPXCHG_32_H
/*
* Note: if you use set64_bit(), __cmpxchg64(), or their variants,
* Note: if you use __cmpxchg64(), or their variants,
* you need to test for the feature in boot_cpu_data.
*/
#ifdef CONFIG_X86_CMPXCHG64
#define arch_cmpxchg64(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
(unsigned long long)(n)))
#define arch_cmpxchg64_local(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \
(unsigned long long)(n)))
#define arch_try_cmpxchg64(ptr, po, n) \
__try_cmpxchg64((ptr), (unsigned long long *)(po), \
(unsigned long long)(n))
#endif
union __u64_halves {
u64 full;
struct {
u32 low, high;
};
};
#define __arch_cmpxchg64(_ptr, _old, _new, _lock) \
({ \
union __u64_halves o = { .full = (_old), }, \
n = { .full = (_new), }; \
\
asm volatile(_lock "cmpxchg8b %[ptr]" \
: [ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
: "memory"); \
\
o.full; \
})
static __always_inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
{
return __arch_cmpxchg64(ptr, old, new, LOCK_PREFIX);
}
static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
static __always_inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
{
u64 prev;
asm volatile(LOCK_PREFIX "cmpxchg8b %1"
: "=A" (prev),
"+m" (*ptr)
: "b" ((u32)new),
"c" ((u32)(new >> 32)),
"0" (old)
: "memory");
return prev;
return __arch_cmpxchg64(ptr, old, new,);
}
static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
#define __arch_try_cmpxchg64(_ptr, _oldp, _new, _lock) \
({ \
union __u64_halves o = { .full = *(_oldp), }, \
n = { .full = (_new), }; \
bool ret; \
\
asm volatile(_lock "cmpxchg8b %[ptr]" \
CC_SET(e) \
: CC_OUT(e) (ret), \
[ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
: "memory"); \
\
if (unlikely(!ret)) \
*(_oldp) = o.full; \
\
likely(ret); \
})
static __always_inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
{
u64 prev;
asm volatile("cmpxchg8b %1"
: "=A" (prev),
"+m" (*ptr)
: "b" ((u32)new),
"c" ((u32)(new >> 32)),
"0" (old)
: "memory");
return prev;
return __arch_try_cmpxchg64(ptr, oldp, new, LOCK_PREFIX);
}
static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new)
static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
{
bool success;
u64 old = *pold;
asm volatile(LOCK_PREFIX "cmpxchg8b %[ptr]"
CC_SET(z)
: CC_OUT(z) (success),
[ptr] "+m" (*ptr),
"+A" (old)
: "b" ((u32)new),
"c" ((u32)(new >> 32))
: "memory");
if (unlikely(!success))
*pold = old;
return success;
return __arch_try_cmpxchg64(ptr, oldp, new,);
}
#ifndef CONFIG_X86_CMPXCHG64
#ifdef CONFIG_X86_CMPXCHG64
#define arch_cmpxchg64 __cmpxchg64
#define arch_cmpxchg64_local __cmpxchg64_local
#define arch_try_cmpxchg64 __try_cmpxchg64
#define arch_try_cmpxchg64_local __try_cmpxchg64_local
#else
/*
* Building a kernel capable running on 80386 and 80486. It may be necessary
* to simulate the cmpxchg8b on the 80386 and 80486 CPU.
*/
#define arch_cmpxchg64(ptr, o, n) \
#define __arch_cmpxchg64_emu(_ptr, _old, _new, _lock_loc, _lock) \
({ \
__typeof__(*(ptr)) __ret; \
__typeof__(*(ptr)) __old = (o); \
__typeof__(*(ptr)) __new = (n); \
alternative_io(LOCK_PREFIX_HERE \
union __u64_halves o = { .full = (_old), }, \
n = { .full = (_new), }; \
\
asm volatile(ALTERNATIVE(_lock_loc \
"call cmpxchg8b_emu", \
"lock; cmpxchg8b (%%esi)" , \
X86_FEATURE_CX8, \
"=A" (__ret), \
"S" ((ptr)), "0" (__old), \
"b" ((unsigned int)__new), \
"c" ((unsigned int)(__new>>32)) \
_lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \
: [ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high), "S" (_ptr) \
: "memory"); \
__ret; })
\
o.full; \
})
static __always_inline u64 arch_cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
{
return __arch_cmpxchg64_emu(ptr, old, new, LOCK_PREFIX_HERE, "lock; ");
}
#define arch_cmpxchg64 arch_cmpxchg64
static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
{
return __arch_cmpxchg64_emu(ptr, old, new, ,);
}
#define arch_cmpxchg64_local arch_cmpxchg64_local
#define arch_cmpxchg64_local(ptr, o, n) \
#define __arch_try_cmpxchg64_emu(_ptr, _oldp, _new, _lock_loc, _lock) \
({ \
__typeof__(*(ptr)) __ret; \
__typeof__(*(ptr)) __old = (o); \
__typeof__(*(ptr)) __new = (n); \
alternative_io("call cmpxchg8b_emu", \
"cmpxchg8b (%%esi)" , \
X86_FEATURE_CX8, \
"=A" (__ret), \
"S" ((ptr)), "0" (__old), \
"b" ((unsigned int)__new), \
"c" ((unsigned int)(__new>>32)) \
union __u64_halves o = { .full = *(_oldp), }, \
n = { .full = (_new), }; \
bool ret; \
\
asm volatile(ALTERNATIVE(_lock_loc \
"call cmpxchg8b_emu", \
_lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \
CC_SET(e) \
: CC_OUT(e) (ret), \
[ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high), "S" (_ptr) \
: "memory"); \
__ret; })
\
if (unlikely(!ret)) \
*(_oldp) = o.full; \
\
likely(ret); \
})
static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
{
return __arch_try_cmpxchg64_emu(ptr, oldp, new, LOCK_PREFIX_HERE, "lock; ");
}
#define arch_try_cmpxchg64 arch_try_cmpxchg64
static __always_inline bool arch_try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
{
return __arch_try_cmpxchg64_emu(ptr, oldp, new, ,);
}
#define arch_try_cmpxchg64_local arch_try_cmpxchg64_local
#endif
......
......@@ -20,6 +20,12 @@
arch_try_cmpxchg((ptr), (po), (n)); \
})
#define arch_try_cmpxchg64_local(ptr, po, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
arch_try_cmpxchg_local((ptr), (po), (n)); \
})
union __u128_halves {
u128 full;
struct {
......@@ -62,7 +68,7 @@ static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old,
asm volatile(_lock "cmpxchg16b %[ptr]" \
CC_SET(e) \
: CC_OUT(e) (ret), \
[ptr] "+m" (*ptr), \
[ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
: "memory"); \
......
......@@ -85,6 +85,8 @@ DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
#define virt_spin_lock virt_spin_lock
static inline bool virt_spin_lock(struct qspinlock *lock)
{
int val;
if (!static_branch_likely(&virt_spin_lock_key))
return false;
......@@ -94,10 +96,13 @@ static inline bool virt_spin_lock(struct qspinlock *lock)
* horrible lock 'holder' preemption issues.
*/
do {
while (atomic_read(&lock->val) != 0)
__retry:
val = atomic_read(&lock->val);
if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) {
cpu_relax();
} while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0);
goto __retry;
}
return true;
}
......
......@@ -25,9 +25,9 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
*
* void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock)
* {
* u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0);
* u8 lockval = _Q_LOCKED_VAL;
*
* if (likely(lockval == _Q_LOCKED_VAL))
* if (try_cmpxchg(&lock->locked, &lockval, 0))
* return;
* pv_queued_spin_unlock_slowpath(lock, lockval);
* }
......@@ -40,10 +40,9 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
#define PV_UNLOCK_ASM \
FRAME_BEGIN \
"push %rdx\n\t" \
"mov $0x1,%eax\n\t" \
"mov $" __stringify(_Q_LOCKED_VAL) ",%eax\n\t" \
"xor %edx,%edx\n\t" \
LOCK_PREFIX "cmpxchg %dl,(%rdi)\n\t" \
"cmp $0x1,%al\n\t" \
"jne .slowpath\n\t" \
"pop %rdx\n\t" \
FRAME_END \
......
......@@ -44,7 +44,7 @@
#include <asm/svm.h>
#include <asm/e820/api.h>
DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
DEFINE_STATIC_KEY_FALSE_RO(kvm_async_pf_enabled);
static int kvmapf = 1;
......
......@@ -44,7 +44,7 @@ EXPORT_SYMBOL(tsc_khz);
static int __read_mostly tsc_unstable;
static unsigned int __initdata tsc_early_khz;
static DEFINE_STATIC_KEY_FALSE(__use_tsc);
static DEFINE_STATIC_KEY_FALSE_RO(__use_tsc);
int tsc_clocksource_reliable;
......
......@@ -180,6 +180,11 @@ static inline bool is_kernel_rodata(unsigned long addr)
addr < (unsigned long)__end_rodata;
}
static inline bool is_kernel_ro_after_init(unsigned long addr)
{
return addr >= (unsigned long)__start_ro_after_init &&
addr < (unsigned long)__end_ro_after_init;
}
/**
* is_kernel_inittext - checks if the pointer address is located in the
* .init.text section
......
......@@ -216,6 +216,7 @@ extern struct jump_entry __start___jump_table[];
extern struct jump_entry __stop___jump_table[];
extern void jump_label_init(void);
extern void jump_label_init_ro(void);
extern void jump_label_lock(void);
extern void jump_label_unlock(void);
extern void arch_jump_label_transform(struct jump_entry *entry,
......@@ -265,6 +266,8 @@ static __always_inline void jump_label_init(void)
static_key_initialized = true;
}
static __always_inline void jump_label_init_ro(void) { }
static __always_inline bool static_key_false(struct static_key *key)
{
if (unlikely_notrace(static_key_count(key) > 0))
......
......@@ -1415,6 +1415,7 @@ static void mark_readonly(void)
* insecure pages which are W+X.
*/
flush_module_init_free_work();
jump_label_init_ro();
mark_rodata_ro();
debug_checkwx();
rodata_test();
......
......@@ -432,7 +432,7 @@ static __always_inline void ct_kernel_enter(bool user, int offset) { }
#define CREATE_TRACE_POINTS
#include <trace/events/context_tracking.h>
DEFINE_STATIC_KEY_FALSE(context_tracking_key);
DEFINE_STATIC_KEY_FALSE_RO(context_tracking_key);
EXPORT_SYMBOL_GPL(context_tracking_key);
static noinstr bool context_tracking_recursion_enter(void)
......
......@@ -530,6 +530,45 @@ void __init jump_label_init(void)
cpus_read_unlock();
}
static inline bool static_key_sealed(struct static_key *key)
{
return (key->type & JUMP_TYPE_LINKED) && !(key->type & ~JUMP_TYPE_MASK);
}
static inline void static_key_seal(struct static_key *key)
{
unsigned long type = key->type & JUMP_TYPE_TRUE;
key->type = JUMP_TYPE_LINKED | type;
}
void jump_label_init_ro(void)
{
struct jump_entry *iter_start = __start___jump_table;
struct jump_entry *iter_stop = __stop___jump_table;
struct jump_entry *iter;
if (WARN_ON_ONCE(!static_key_initialized))
return;
cpus_read_lock();
jump_label_lock();
for (iter = iter_start; iter < iter_stop; iter++) {
struct static_key *iterk = jump_entry_key(iter);
if (!is_kernel_ro_after_init((unsigned long)iterk))
continue;
if (static_key_sealed(iterk))
continue;
static_key_seal(iterk);
}
jump_label_unlock();
cpus_read_unlock();
}
#ifdef CONFIG_MODULES
enum jump_label_type jump_label_init_type(struct jump_entry *entry)
......@@ -650,6 +689,15 @@ static int jump_label_add_module(struct module *mod)
static_key_set_entries(key, iter);
continue;
}
/*
* If the key was sealed at init, then there's no need to keep a
* reference to its module entries - just patch them now and be
* done with it.
*/
if (static_key_sealed(key))
goto do_poke;
jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL);
if (!jlm)
return -ENOMEM;
......@@ -675,6 +723,7 @@ static int jump_label_add_module(struct module *mod)
static_key_set_linked(key);
/* Only update if we've changed from our initial state */
do_poke:
if (jump_label_type(iter) != jump_label_init_type(iter))
__jump_label_update(key, iter, iter_stop, true);
}
......@@ -699,6 +748,10 @@ static void jump_label_del_module(struct module *mod)
if (within_module((unsigned long)key, mod))
continue;
/* No @jlm allocated because key was sealed at init. */
if (static_key_sealed(key))
continue;
/* No memory during module load */
if (WARN_ON(!static_key_linked(key)))
continue;
......
......@@ -53,8 +53,8 @@ static inline void __lockevent_add(enum lock_events event, int inc)
#else /* CONFIG_LOCK_EVENT_COUNTS */
#define lockevent_inc(ev)
#define lockevent_add(ev, c)
#define lockevent_cond_inc(ev, c)
#define lockevent_add(ev, c) do { (void)(c); } while (0)
#define lockevent_cond_inc(ev, c) do { (void)(c); } while (0)
#endif /* CONFIG_LOCK_EVENT_COUNTS */
......
......@@ -220,21 +220,18 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
*/
static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
{
u32 old, new, val = atomic_read(&lock->val);
u32 old, new;
for (;;) {
new = (val & _Q_LOCKED_PENDING_MASK) | tail;
old = atomic_read(&lock->val);
do {
new = (old & _Q_LOCKED_PENDING_MASK) | tail;
/*
* We can use relaxed semantics since the caller ensures that
* the MCS node is properly initialized before updating the
* tail.
*/
old = atomic_cmpxchg_relaxed(&lock->val, val, new);
if (old == val)
break;
} while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
val = old;
}
return old;
}
#endif /* _Q_PENDING_BITS == 8 */
......
......@@ -86,9 +86,10 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
*/
for (;;) {
int val = atomic_read(&lock->val);
u8 old = 0;
if (!(val & _Q_LOCKED_PENDING_MASK) &&
(cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
try_cmpxchg_acquire(&lock->locked, &old, _Q_LOCKED_VAL)) {
lockevent_inc(pv_lock_stealing);
return true;
}
......@@ -116,11 +117,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
* barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
* lock just to be sure that it will get it.
*/
static __always_inline int trylock_clear_pending(struct qspinlock *lock)
static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
{
u16 old = _Q_PENDING_VAL;
return !READ_ONCE(lock->locked) &&
(cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
_Q_LOCKED_VAL) == _Q_PENDING_VAL);
try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
}
#else /* _Q_PENDING_BITS == 8 */
static __always_inline void set_pending(struct qspinlock *lock)
......@@ -128,27 +130,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
atomic_or(_Q_PENDING_VAL, &lock->val);
}
static __always_inline int trylock_clear_pending(struct qspinlock *lock)
static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
{
int val = atomic_read(&lock->val);
for (;;) {
int old, new;
if (val & _Q_LOCKED_MASK)
break;
old = atomic_read(&lock->val);
do {
if (old & _Q_LOCKED_MASK)
return false;
/*
* Try to clear pending bit & set locked bit
*/
old = val;
new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
val = atomic_cmpxchg_acquire(&lock->val, old, new);
new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
} while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));
if (val == old)
return 1;
}
return 0;
return true;
}
#endif /* _Q_PENDING_BITS == 8 */
......@@ -216,8 +212,9 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
int hopcnt = 0;
for_each_hash_entry(he, offset, hash) {
struct qspinlock *old = NULL;
hopcnt++;
if (!cmpxchg(&he->lock, NULL, lock)) {
if (try_cmpxchg(&he->lock, &old, lock)) {
WRITE_ONCE(he->node, node);
lockevent_pv_hop(hopcnt);
return &he->lock;
......@@ -294,7 +291,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
{
struct pv_node *pn = (struct pv_node *)node;
struct pv_node *pp = (struct pv_node *)prev;
bool __maybe_unused wait_early;
bool wait_early;
int loop;
for (;;) {
......@@ -360,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
enum vcpu_state old = vcpu_halted;
/*
* If the vCPU is indeed halted, advance its state to match that of
* pv_wait_node(). If OTOH this fails, the vCPU was running and will
......@@ -377,8 +374,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
* subsequent writes.
*/
smp_mb__before_atomic();
if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
!= vcpu_halted)
if (!try_cmpxchg_relaxed(&pn->state, &old, vcpu_hashed))
return;
/*
......@@ -546,15 +542,14 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
#ifndef __pv_queued_spin_unlock
__visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock)
{
u8 locked;
u8 locked = _Q_LOCKED_VAL;
/*
* We must not unlock if SLOW, because in that case we must first
* unhash. Otherwise it would be possible to have multiple @lock
* entries, which would be BAD.
*/
locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
if (likely(locked == _Q_LOCKED_VAL))
if (try_cmpxchg_release(&lock->locked, &locked, 0))
return;
__pv_queued_spin_unlock_slowpath(lock, locked);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment