Commit c64396cc authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'locking-urgent-2021-01-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking fixes from Thomas Gleixner:
 "A set of PI futex fixes:

   - Address a longstanding issue where the user space part of the PI
     futex is not writeable. The kernel returns with inconsistent state
     which can in the worst case result in a UAF of a tasks kernel
     stack.

     The solution is to establish consistent kernel state which makes
     future operations on the futex fail because user space and kernel
     space state are inconsistent. Not a problem as PI futexes
     fundamentaly require a functional RW mapping and if user space
     pulls the rug under it, then it can keep the pieces it asked for.

   - Address an issue where the return value is incorrect in case that
     the futex was acquired after a timeout/signal made the waiter drop
     out of the rtmutex wait.

     In one of the corner cases the kernel returned an error code
     despite having successfully acquired the futex"

* tag 'locking-urgent-2021-01-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  futex: Handle faults correctly for PI futexes
  futex: Simplify fixup_pi_state_owner()
  futex: Use pi_state_update_owner() in put_pi_state()
  rtmutex: Remove unused argument from rt_mutex_proxy_unlock()
  futex: Provide and use pi_state_update_owner()
  futex: Replace pointless printk in fixup_owner()
  futex: Ensure the correct return value from futex_lock_pi()
parents e5ff2cb9 34b1a1ce
...@@ -763,6 +763,29 @@ static struct futex_pi_state *alloc_pi_state(void) ...@@ -763,6 +763,29 @@ static struct futex_pi_state *alloc_pi_state(void)
return pi_state; return pi_state;
} }
static void pi_state_update_owner(struct futex_pi_state *pi_state,
struct task_struct *new_owner)
{
struct task_struct *old_owner = pi_state->owner;
lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
if (old_owner) {
raw_spin_lock(&old_owner->pi_lock);
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
raw_spin_unlock(&old_owner->pi_lock);
}
if (new_owner) {
raw_spin_lock(&new_owner->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &new_owner->pi_state_list);
pi_state->owner = new_owner;
raw_spin_unlock(&new_owner->pi_lock);
}
}
static void get_pi_state(struct futex_pi_state *pi_state) static void get_pi_state(struct futex_pi_state *pi_state)
{ {
WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount)); WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
...@@ -785,17 +808,11 @@ static void put_pi_state(struct futex_pi_state *pi_state) ...@@ -785,17 +808,11 @@ static void put_pi_state(struct futex_pi_state *pi_state)
* and has cleaned up the pi_state already * and has cleaned up the pi_state already
*/ */
if (pi_state->owner) { if (pi_state->owner) {
struct task_struct *owner;
unsigned long flags; unsigned long flags;
raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
owner = pi_state->owner; pi_state_update_owner(pi_state, NULL);
if (owner) { rt_mutex_proxy_unlock(&pi_state->pi_mutex);
raw_spin_lock(&owner->pi_lock);
list_del_init(&pi_state->list);
raw_spin_unlock(&owner->pi_lock);
}
rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
} }
...@@ -941,7 +958,8 @@ static inline void exit_pi_state_list(struct task_struct *curr) { } ...@@ -941,7 +958,8 @@ static inline void exit_pi_state_list(struct task_struct *curr) { }
* FUTEX_OWNER_DIED bit. See [4] * FUTEX_OWNER_DIED bit. See [4]
* *
* [10] There is no transient state which leaves owner and user space * [10] There is no transient state which leaves owner and user space
* TID out of sync. * TID out of sync. Except one error case where the kernel is denied
* write access to the user address, see fixup_pi_state_owner().
* *
* *
* Serialization and lifetime rules: * Serialization and lifetime rules:
...@@ -1521,26 +1539,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ ...@@ -1521,26 +1539,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
ret = -EINVAL; ret = -EINVAL;
} }
if (ret) if (!ret) {
goto out_unlock; /*
* This is a point of no return; once we modified the uval
/* * there is no going back and subsequent operations must
* This is a point of no return; once we modify the uval there is no * not fail.
* going back and subsequent operations must not fail. */
*/ pi_state_update_owner(pi_state, new_owner);
postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
raw_spin_lock(&pi_state->owner->pi_lock); }
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
raw_spin_unlock(&pi_state->owner->pi_lock);
raw_spin_lock(&new_owner->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &new_owner->pi_state_list);
pi_state->owner = new_owner;
raw_spin_unlock(&new_owner->pi_lock);
postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
out_unlock: out_unlock:
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
...@@ -2323,18 +2330,13 @@ static void unqueue_me_pi(struct futex_q *q) ...@@ -2323,18 +2330,13 @@ static void unqueue_me_pi(struct futex_q *q)
spin_unlock(q->lock_ptr); spin_unlock(q->lock_ptr);
} }
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
struct task_struct *argowner) struct task_struct *argowner)
{ {
struct futex_pi_state *pi_state = q->pi_state; struct futex_pi_state *pi_state = q->pi_state;
u32 uval, curval, newval;
struct task_struct *oldowner, *newowner; struct task_struct *oldowner, *newowner;
u32 newtid; u32 uval, curval, newval, newtid;
int ret, err = 0; int err = 0;
lockdep_assert_held(q->lock_ptr);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
oldowner = pi_state->owner; oldowner = pi_state->owner;
...@@ -2368,14 +2370,12 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, ...@@ -2368,14 +2370,12 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
* We raced against a concurrent self; things are * We raced against a concurrent self; things are
* already fixed up. Nothing to do. * already fixed up. Nothing to do.
*/ */
ret = 0; return 0;
goto out_unlock;
} }
if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
/* We got the lock after all, nothing to fix. */ /* We got the lock. pi_state is correct. Tell caller. */
ret = 0; return 1;
goto out_unlock;
} }
/* /*
...@@ -2402,8 +2402,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, ...@@ -2402,8 +2402,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
* We raced against a concurrent self; things are * We raced against a concurrent self; things are
* already fixed up. Nothing to do. * already fixed up. Nothing to do.
*/ */
ret = 0; return 1;
goto out_unlock;
} }
newowner = argowner; newowner = argowner;
} }
...@@ -2433,22 +2432,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, ...@@ -2433,22 +2432,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
* We fixed up user space. Now we need to fix the pi_state * We fixed up user space. Now we need to fix the pi_state
* itself. * itself.
*/ */
if (pi_state->owner != NULL) { pi_state_update_owner(pi_state, newowner);
raw_spin_lock(&pi_state->owner->pi_lock);
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
raw_spin_unlock(&pi_state->owner->pi_lock);
}
pi_state->owner = newowner; return argowner == current;
raw_spin_lock(&newowner->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &newowner->pi_state_list);
raw_spin_unlock(&newowner->pi_lock);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
return 0;
/* /*
* In order to reschedule or handle a page fault, we need to drop the * In order to reschedule or handle a page fault, we need to drop the
...@@ -2469,17 +2455,16 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, ...@@ -2469,17 +2455,16 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
switch (err) { switch (err) {
case -EFAULT: case -EFAULT:
ret = fault_in_user_writeable(uaddr); err = fault_in_user_writeable(uaddr);
break; break;
case -EAGAIN: case -EAGAIN:
cond_resched(); cond_resched();
ret = 0; err = 0;
break; break;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
ret = err;
break; break;
} }
...@@ -2489,17 +2474,44 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, ...@@ -2489,17 +2474,44 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
/* /*
* Check if someone else fixed it for us: * Check if someone else fixed it for us:
*/ */
if (pi_state->owner != oldowner) { if (pi_state->owner != oldowner)
ret = 0; return argowner == current;
goto out_unlock;
}
if (ret) /* Retry if err was -EAGAIN or the fault in succeeded */
goto out_unlock; if (!err)
goto retry;
goto retry; /*
* fault_in_user_writeable() failed so user state is immutable. At
* best we can make the kernel state consistent but user state will
* be most likely hosed and any subsequent unlock operation will be
* rejected due to PI futex rule [10].
*
* Ensure that the rtmutex owner is also the pi_state owner despite
* the user space value claiming something different. There is no
* point in unlocking the rtmutex if current is the owner as it
* would need to wait until the next waiter has taken the rtmutex
* to guarantee consistent state. Keep it simple. Userspace asked
* for this wreckaged state.
*
* The rtmutex has an owner - either current or some other
* task. See the EAGAIN loop above.
*/
pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
out_unlock: return err;
}
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
struct task_struct *argowner)
{
struct futex_pi_state *pi_state = q->pi_state;
int ret;
lockdep_assert_held(q->lock_ptr);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
ret = __fixup_pi_state_owner(uaddr, q, argowner);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
return ret; return ret;
} }
...@@ -2523,8 +2535,6 @@ static long futex_wait_restart(struct restart_block *restart); ...@@ -2523,8 +2535,6 @@ static long futex_wait_restart(struct restart_block *restart);
*/ */
static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
{ {
int ret = 0;
if (locked) { if (locked) {
/* /*
* Got the lock. We might not be the anticipated owner if we * Got the lock. We might not be the anticipated owner if we
...@@ -2535,8 +2545,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) ...@@ -2535,8 +2545,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* stable state, anything else needs more attention. * stable state, anything else needs more attention.
*/ */
if (q->pi_state->owner != current) if (q->pi_state->owner != current)
ret = fixup_pi_state_owner(uaddr, q, current); return fixup_pi_state_owner(uaddr, q, current);
return ret ? ret : locked; return 1;
} }
/* /*
...@@ -2547,23 +2557,17 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) ...@@ -2547,23 +2557,17 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* Another speculative read; pi_state->owner == current is unstable * Another speculative read; pi_state->owner == current is unstable
* but needs our attention. * but needs our attention.
*/ */
if (q->pi_state->owner == current) { if (q->pi_state->owner == current)
ret = fixup_pi_state_owner(uaddr, q, NULL); return fixup_pi_state_owner(uaddr, q, NULL);
return ret;
}
/* /*
* Paranoia check. If we did not take the lock, then we should not be * Paranoia check. If we did not take the lock, then we should not be
* the owner of the rt_mutex. * the owner of the rt_mutex. Warn and establish consistent state.
*/ */
if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) { if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " return fixup_pi_state_owner(uaddr, q, current);
"pi-state %p\n", ret,
q->pi_state->pi_mutex.owner,
q->pi_state->owner);
}
return ret; return 0;
} }
/** /**
...@@ -2771,7 +2775,6 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ...@@ -2771,7 +2775,6 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
ktime_t *time, int trylock) ktime_t *time, int trylock)
{ {
struct hrtimer_sleeper timeout, *to; struct hrtimer_sleeper timeout, *to;
struct futex_pi_state *pi_state = NULL;
struct task_struct *exiting = NULL; struct task_struct *exiting = NULL;
struct rt_mutex_waiter rt_waiter; struct rt_mutex_waiter rt_waiter;
struct futex_hash_bucket *hb; struct futex_hash_bucket *hb;
...@@ -2907,23 +2910,8 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ...@@ -2907,23 +2910,8 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
if (res) if (res)
ret = (res < 0) ? res : 0; ret = (res < 0) ? res : 0;
/*
* If fixup_owner() faulted and was unable to handle the fault, unlock
* it and return the fault to userspace.
*/
if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
pi_state = q.pi_state;
get_pi_state(pi_state);
}
/* Unqueue and drop the lock */ /* Unqueue and drop the lock */
unqueue_me_pi(&q); unqueue_me_pi(&q);
if (pi_state) {
rt_mutex_futex_unlock(&pi_state->pi_mutex);
put_pi_state(pi_state);
}
goto out; goto out;
out_unlock_put_key: out_unlock_put_key:
...@@ -3183,7 +3171,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, ...@@ -3183,7 +3171,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
u32 __user *uaddr2) u32 __user *uaddr2)
{ {
struct hrtimer_sleeper timeout, *to; struct hrtimer_sleeper timeout, *to;
struct futex_pi_state *pi_state = NULL;
struct rt_mutex_waiter rt_waiter; struct rt_mutex_waiter rt_waiter;
struct futex_hash_bucket *hb; struct futex_hash_bucket *hb;
union futex_key key2 = FUTEX_KEY_INIT; union futex_key key2 = FUTEX_KEY_INIT;
...@@ -3261,16 +3248,17 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, ...@@ -3261,16 +3248,17 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (q.pi_state && (q.pi_state->owner != current)) { if (q.pi_state && (q.pi_state->owner != current)) {
spin_lock(q.lock_ptr); spin_lock(q.lock_ptr);
ret = fixup_pi_state_owner(uaddr2, &q, current); ret = fixup_pi_state_owner(uaddr2, &q, current);
if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
pi_state = q.pi_state;
get_pi_state(pi_state);
}
/* /*
* Drop the reference to the pi state which * Drop the reference to the pi state which
* the requeue_pi() code acquired for us. * the requeue_pi() code acquired for us.
*/ */
put_pi_state(q.pi_state); put_pi_state(q.pi_state);
spin_unlock(q.lock_ptr); spin_unlock(q.lock_ptr);
/*
* Adjust the return value. It's either -EFAULT or
* success (1) but the caller expects 0 for success.
*/
ret = ret < 0 ? ret : 0;
} }
} else { } else {
struct rt_mutex *pi_mutex; struct rt_mutex *pi_mutex;
...@@ -3301,25 +3289,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, ...@@ -3301,25 +3289,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (res) if (res)
ret = (res < 0) ? res : 0; ret = (res < 0) ? res : 0;
/*
* If fixup_pi_state_owner() faulted and was unable to handle
* the fault, unlock the rt_mutex and return the fault to
* userspace.
*/
if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
pi_state = q.pi_state;
get_pi_state(pi_state);
}
/* Unqueue and drop the lock. */ /* Unqueue and drop the lock. */
unqueue_me_pi(&q); unqueue_me_pi(&q);
} }
if (pi_state) {
rt_mutex_futex_unlock(&pi_state->pi_mutex);
put_pi_state(pi_state);
}
if (ret == -EINTR) { if (ret == -EINTR) {
/* /*
* We've already been requeued, but cannot restart by calling * We've already been requeued, but cannot restart by calling
......
...@@ -1716,8 +1716,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock, ...@@ -1716,8 +1716,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
* possible because it belongs to the pi_state which is about to be freed * possible because it belongs to the pi_state which is about to be freed
* and it is not longer visible to other tasks. * and it is not longer visible to other tasks.
*/ */
void rt_mutex_proxy_unlock(struct rt_mutex *lock, void rt_mutex_proxy_unlock(struct rt_mutex *lock)
struct task_struct *proxy_owner)
{ {
debug_rt_mutex_proxy_unlock(lock); debug_rt_mutex_proxy_unlock(lock);
rt_mutex_set_owner(lock, NULL); rt_mutex_set_owner(lock, NULL);
......
...@@ -133,8 +133,7 @@ enum rtmutex_chainwalk { ...@@ -133,8 +133,7 @@ enum rtmutex_chainwalk {
extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
struct task_struct *proxy_owner); struct task_struct *proxy_owner);
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, extern void rt_mutex_proxy_unlock(struct rt_mutex *lock);
struct task_struct *proxy_owner);
extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter, struct rt_mutex_waiter *waiter,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment