Commit 8308756f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull core locking updates from Ingo Molnar:
 "The main changes are:

   - mutex, completions and rtmutex micro-optimizations
   - lock debugging fix
   - various cleanups in the MCS and the futex code"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  locking/rtmutex: Optimize setting task running after being blocked
  locking/rwsem: Use task->state helpers
  sched/completion: Add lock-free checking of the blocking case
  sched/completion: Remove unnecessary ->wait.lock serialization when reading completion state
  locking/mutex: Explicitly mark task as running after wakeup
  futex: Fix argument handling in futex_lock_pi() calls
  doc: Fix misnamed FUTEX_CMP_REQUEUE_PI op constants
  locking/Documentation: Update code path
  softirq/preempt: Add missing current->preempt_disable_ip update
  locking/osq: No need for load/acquire when acquire-polling
  locking/mcs: Better differentiate between MCS variants
  locking/mutex: Introduce ww_mutex_set_context_slowpath()
  locking/mutex: Move MCS related comments to proper location
  locking/mutex: Checking the stamp is WW only
parents 23e8fe2e afffc6c1
......@@ -98,7 +98,7 @@ rt_mutex_start_proxy_lock() and rt_mutex_finish_proxy_lock(), which
allow the requeue code to acquire an uncontended rt_mutex on behalf
of the waiter and to enqueue the waiter on a contended rt_mutex.
Two new system calls provide the kernel<->user interface to
requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_REQUEUE_CMP_PI.
requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI.
FUTEX_WAIT_REQUEUE_PI is called by the waiter (pthread_cond_wait()
and pthread_cond_timedwait()) to block on the initial futex and wait
......@@ -107,7 +107,7 @@ result of a high-speed collision between futex_wait() and
futex_lock_pi(), with some extra logic to check for the additional
wake-up scenarios.
FUTEX_REQUEUE_CMP_PI is called by the waker
FUTEX_CMP_REQUEUE_PI is called by the waker
(pthread_cond_broadcast() and pthread_cond_signal()) to requeue and
possibly wake the waiting tasks. Internally, this system call is
still handled by futex_requeue (by passing requeue_pi=1). Before
......@@ -120,12 +120,12 @@ task as a waiter on the underlying rt_mutex. It is possible that
the lock can be acquired at this stage as well, if so, the next
waiter is woken to finish the acquisition of the lock.
FUTEX_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but
FUTEX_CMP_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but
their sum is all that really matters. futex_requeue() will wake or
requeue up to nr_wake + nr_requeue tasks. It will wake only as many
tasks as it can acquire the lock for, which in the majority of cases
should be 0 as good programming practice dictates that the caller of
either pthread_cond_broadcast() or pthread_cond_signal() acquire the
mutex prior to making the call. FUTEX_REQUEUE_PI requires that
mutex prior to making the call. FUTEX_CMP_REQUEUE_PI requires that
nr_wake=1. nr_requeue should be INT_MAX for broadcast and 0 for
signal.
......@@ -34,7 +34,7 @@ The validator tracks lock-class usage history into 4n + 1 separate state bits:
- 'ever held with STATE enabled'
- 'ever held as readlock with STATE enabled'
Where STATE can be either one of (kernel/lockdep_states.h)
Where STATE can be either one of (kernel/locking/lockdep_states.h)
- hardirq
- softirq
- reclaim_fs
......
......@@ -5,8 +5,11 @@
* An MCS like lock especially tailored for optimistic spinning for sleeping
* lock implementations (mutex, rwsem, etc).
*/
#define OSQ_UNLOCKED_VAL (0)
struct optimistic_spin_node {
struct optimistic_spin_node *next, *prev;
int locked; /* 1 if lock acquired */
int cpu; /* encoded CPU # + 1 value */
};
struct optimistic_spin_queue {
/*
......@@ -16,6 +19,8 @@ struct optimistic_spin_queue {
atomic_t tail;
};
#define OSQ_UNLOCKED_VAL (0)
/* Init macro and function. */
#define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) }
......@@ -24,4 +29,7 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock)
atomic_set(&lock->tail, OSQ_UNLOCKED_VAL);
}
extern bool osq_lock(struct optimistic_spin_queue *lock);
extern void osq_unlock(struct optimistic_spin_queue *lock);
#endif
......@@ -231,6 +231,10 @@ config RWSEM_SPIN_ON_OWNER
def_bool y
depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
config LOCK_SPIN_ON_OWNER
def_bool y
depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER
config ARCH_USE_QUEUE_RWLOCK
bool
......
......@@ -2258,7 +2258,7 @@ static long futex_wait_restart(struct restart_block *restart)
* if there are waiters then it will block, it does PI, etc. (Due to
* races the kernel might see a 0 value of the futex too.)
*/
static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
ktime_t *time, int trylock)
{
struct hrtimer_sleeper timeout, *to = NULL;
......@@ -2953,11 +2953,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
case FUTEX_WAKE_OP:
return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
case FUTEX_LOCK_PI:
return futex_lock_pi(uaddr, flags, val, timeout, 0);
return futex_lock_pi(uaddr, flags, timeout, 0);
case FUTEX_UNLOCK_PI:
return futex_unlock_pi(uaddr, flags);
case FUTEX_TRYLOCK_PI:
return futex_lock_pi(uaddr, flags, 0, timeout, 1);
return futex_lock_pi(uaddr, flags, NULL, 1);
case FUTEX_WAIT_REQUEUE_PI:
val3 = FUTEX_BITSET_MATCH_ANY;
return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
......
obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o
obj-y += mutex.o semaphore.o rwsem.o
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_lockdep.o = -pg
......@@ -14,6 +14,7 @@ ifeq ($(CONFIG_PROC_FS),y)
obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
endif
obj-$(CONFIG_SMP) += spinlock.o
obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
obj-$(CONFIG_SMP) += lglock.o
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
......
......@@ -108,20 +108,4 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
arch_mcs_spin_unlock_contended(&next->locked);
}
/*
* Cancellable version of the MCS lock above.
*
* Intended for adaptive spinning of sleeping locks:
* mutex_lock()/rwsem_down_{read,write}() etc.
*/
struct optimistic_spin_node {
struct optimistic_spin_node *next, *prev;
int locked; /* 1 if lock acquired */
int cpu; /* encoded CPU # value */
};
extern bool osq_lock(struct optimistic_spin_queue *lock);
extern void osq_unlock(struct optimistic_spin_queue *lock);
#endif /* __LINUX_MCS_SPINLOCK_H */
......@@ -147,7 +147,7 @@ static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
}
/*
* after acquiring lock with fastpath or when we lost out in contested
* After acquiring lock with fastpath or when we lost out in contested
* slowpath, set ctx and wake up any waiters so they can recheck.
*
* This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set,
......@@ -191,19 +191,32 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock,
spin_unlock_mutex(&lock->base.wait_lock, flags);
}
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
/*
* In order to avoid a stampede of mutex spinners from acquiring the mutex
* more or less simultaneously, the spinners need to acquire a MCS lock
* first before spinning on the owner field.
* After acquiring lock in the slowpath set ctx and wake up any
* waiters so they can recheck.
*
* Callers must hold the mutex wait_lock.
*/
static __always_inline void
ww_mutex_set_context_slowpath(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
struct mutex_waiter *cur;
/*
* Mutex spinning code migrated from kernel/sched/core.c
*/
ww_mutex_lock_acquired(lock, ctx);
lock->ctx = ctx;
/*
* Give any possible sleeping processes the chance to wake up,
* so they can recheck if they have to back off.
*/
list_for_each_entry(cur, &lock->base.wait_list, list) {
debug_mutex_wake_waiter(&lock->base, cur);
wake_up_process(cur->task);
}
}
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
{
if (lock->owner != owner)
......@@ -307,6 +320,11 @@ static bool mutex_optimistic_spin(struct mutex *lock,
if (!mutex_can_spin_on_owner(lock))
goto done;
/*
* In order to avoid a stampede of mutex spinners trying to
* acquire the mutex all at once, the spinners need to take a
* MCS (queued) lock first before spinning on the owner field.
*/
if (!osq_lock(&lock->osq))
goto done;
......@@ -469,7 +487,7 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock)
EXPORT_SYMBOL(ww_mutex_unlock);
static inline int __sched
__mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
__ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
{
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
......@@ -557,7 +575,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
}
if (use_ww_ctx && ww_ctx->acquired > 0) {
ret = __mutex_lock_check_stamp(lock, ww_ctx);
ret = __ww_mutex_lock_check_stamp(lock, ww_ctx);
if (ret)
goto err;
}
......@@ -569,6 +587,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
schedule_preempt_disabled();
spin_lock_mutex(&lock->wait_lock, flags);
}
__set_task_state(task, TASK_RUNNING);
mutex_remove_waiter(lock, &waiter, current_thread_info());
/* set it to 0 if there are no waiters left: */
if (likely(list_empty(&lock->wait_list)))
......@@ -582,23 +602,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
if (use_ww_ctx) {
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
struct mutex_waiter *cur;
/*
* This branch gets optimized out for the common case,
* and is only important for ww_mutex_lock.
*/
ww_mutex_lock_acquired(ww, ww_ctx);
ww->ctx = ww_ctx;
/*
* Give any possible sleeping processes the chance to wake up,
* so they can recheck if they have to back off.
*/
list_for_each_entry(cur, &lock->wait_list, list) {
debug_mutex_wake_waiter(lock, cur);
wake_up_process(cur->task);
}
ww_mutex_set_context_slowpath(ww, ww_ctx);
}
spin_unlock_mutex(&lock->wait_lock, flags);
......
#include <linux/percpu.h>
#include <linux/sched.h>
#include "mcs_spinlock.h"
#ifdef CONFIG_SMP
#include <linux/osq_lock.h>
/*
* An MCS like lock especially tailored for optimistic spinning for sleeping
......@@ -111,7 +109,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
* cmpxchg in an attempt to undo our queueing.
*/
while (!smp_load_acquire(&node->locked)) {
while (!ACCESS_ONCE(node->locked)) {
/*
* If we need to reschedule bail... so we can block.
*/
......@@ -203,6 +201,3 @@ void osq_unlock(struct optimistic_spin_queue *lock)
if (next)
ACCESS_ONCE(next->locked) = 1;
}
#endif
......@@ -1130,6 +1130,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
set_current_state(state);
}
__set_current_state(TASK_RUNNING);
return ret;
}
......@@ -1188,10 +1189,9 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
if (likely(!ret))
/* sleep on the mutex */
ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
set_current_state(TASK_RUNNING);
if (unlikely(ret)) {
remove_waiter(lock, &waiter);
rt_mutex_handle_deadlock(ret, chwalk, &waiter);
......@@ -1626,10 +1626,9 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
set_current_state(TASK_INTERRUPTIBLE);
/* sleep on the mutex */
ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
set_current_state(TASK_RUNNING);
if (unlikely(ret))
remove_waiter(lock, waiter);
......
......@@ -154,7 +154,7 @@ void __sched __down_read(struct rw_semaphore *sem)
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
}
tsk->state = TASK_RUNNING;
__set_task_state(tsk, TASK_RUNNING);
out:
;
}
......
......@@ -242,8 +242,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
schedule();
}
tsk->state = TASK_RUNNING;
__set_task_state(tsk, TASK_RUNNING);
return sem;
}
EXPORT_SYMBOL(rwsem_down_read_failed);
......
......@@ -268,6 +268,15 @@ bool try_wait_for_completion(struct completion *x)
unsigned long flags;
int ret = 1;
/*
* Since x->done will need to be locked only
* in the non-blocking case, we check x->done
* first without taking the lock so we can
* return early in the blocking case.
*/
if (!ACCESS_ONCE(x->done))
return 0;
spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = 0;
......@@ -288,13 +297,6 @@ EXPORT_SYMBOL(try_wait_for_completion);
*/
bool completion_done(struct completion *x)
{
unsigned long flags;
int ret = 1;
spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = 0;
spin_unlock_irqrestore(&x->wait.lock, flags);
return ret;
return !!ACCESS_ONCE(x->done);
}
EXPORT_SYMBOL(completion_done);
......@@ -114,8 +114,12 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
trace_softirqs_off(ip);
raw_local_irq_restore(flags);
if (preempt_count() == cnt)
if (preempt_count() == cnt) {
#ifdef CONFIG_DEBUG_PREEMPT
current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1);
#endif
trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
}
}
EXPORT_SYMBOL(__local_bh_disable_ip);
#endif /* CONFIG_TRACE_IRQFLAGS */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment