Commit 11b9a7a7 authored by Thomas Gleixner's avatar Thomas Gleixner Committed by Greg Kroah-Hartman

futex: Make lookup_pi_state more robust

commit 54a21788 upstream.

The current implementation of lookup_pi_state has ambigous handling of
the TID value 0 in the user space futex.  We can get into the kernel
even if the TID value is 0, because either there is a stale waiters bit
or the owner died bit is set or we are called from the requeue_pi path
or from user space just for fun.

The current code avoids an explicit sanity check for pid = 0 in case
that kernel internal state (waiters) are found for the user space
address.  This can lead to state leakage and worse under some
circumstances.

Handle the cases explicit:

       Waiter | pi_state | pi->owner | uTID      | uODIED | ?

  [1]  NULL   | ---      | ---       | 0         | 0/1    | Valid
  [2]  NULL   | ---      | ---       | >0        | 0/1    | Valid

  [3]  Found  | NULL     | --        | Any       | 0/1    | Invalid

  [4]  Found  | Found    | NULL      | 0         | 1      | Valid
  [5]  Found  | Found    | NULL      | >0        | 1      | Invalid

  [6]  Found  | Found    | task      | 0         | 1      | Valid

  [7]  Found  | Found    | NULL      | Any       | 0      | Invalid

  [8]  Found  | Found    | task      | ==taskTID | 0/1    | Valid
  [9]  Found  | Found    | task      | 0         | 0      | Invalid
  [10] Found  | Found    | task      | !=taskTID | 0/1    | Invalid

 [1] Indicates that the kernel can acquire the futex atomically. We
     came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.

 [2] Valid, if TID does not belong to a kernel thread. If no matching
     thread is found then it indicates that the owner TID has died.

 [3] Invalid. The waiter is queued on a non PI futex

 [4] Valid state after exit_robust_list(), which sets the user space
     value to FUTEX_WAITERS | FUTEX_OWNER_DIED.

 [5] The user space value got manipulated between exit_robust_list()
     and exit_pi_state_list()

 [6] Valid state after exit_pi_state_list() which sets the new owner in
     the pi_state but cannot access the user space value.

 [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.

 [8] Owner and user space value match

 [9] There is no transient state which sets the user space TID to 0
     except exit_robust_list(), but this is indicated by the
     FUTEX_OWNER_DIED bit. See [4]

[10] There is no transient state which leaves owner and user space
     TID out of sync.
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: Will Drewry <wad@chromium.org>
Cc: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent a8f96abb
...@@ -588,10 +588,58 @@ void exit_pi_state_list(struct task_struct *curr) ...@@ -588,10 +588,58 @@ void exit_pi_state_list(struct task_struct *curr)
raw_spin_unlock_irq(&curr->pi_lock); raw_spin_unlock_irq(&curr->pi_lock);
} }
/*
* We need to check the following states:
*
* Waiter | pi_state | pi->owner | uTID | uODIED | ?
*
* [1] NULL | --- | --- | 0 | 0/1 | Valid
* [2] NULL | --- | --- | >0 | 0/1 | Valid
*
* [3] Found | NULL | -- | Any | 0/1 | Invalid
*
* [4] Found | Found | NULL | 0 | 1 | Valid
* [5] Found | Found | NULL | >0 | 1 | Invalid
*
* [6] Found | Found | task | 0 | 1 | Valid
*
* [7] Found | Found | NULL | Any | 0 | Invalid
*
* [8] Found | Found | task | ==taskTID | 0/1 | Valid
* [9] Found | Found | task | 0 | 0 | Invalid
* [10] Found | Found | task | !=taskTID | 0/1 | Invalid
*
* [1] Indicates that the kernel can acquire the futex atomically. We
* came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
*
* [2] Valid, if TID does not belong to a kernel thread. If no matching
* thread is found then it indicates that the owner TID has died.
*
* [3] Invalid. The waiter is queued on a non PI futex
*
* [4] Valid state after exit_robust_list(), which sets the user space
* value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
*
* [5] The user space value got manipulated between exit_robust_list()
* and exit_pi_state_list()
*
* [6] Valid state after exit_pi_state_list() which sets the new owner in
* the pi_state but cannot access the user space value.
*
* [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.
*
* [8] Owner and user space value match
*
* [9] There is no transient state which sets the user space TID to 0
* except exit_robust_list(), but this is indicated by the
* FUTEX_OWNER_DIED bit. See [4]
*
* [10] There is no transient state which leaves owner and user space
* TID out of sync.
*/
static int static int
lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
union futex_key *key, struct futex_pi_state **ps, union futex_key *key, struct futex_pi_state **ps)
struct task_struct *task)
{ {
struct futex_pi_state *pi_state = NULL; struct futex_pi_state *pi_state = NULL;
struct futex_q *this, *next; struct futex_q *this, *next;
...@@ -604,12 +652,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, ...@@ -604,12 +652,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
plist_for_each_entry_safe(this, next, head, list) { plist_for_each_entry_safe(this, next, head, list) {
if (match_futex(&this->key, key)) { if (match_futex(&this->key, key)) {
/* /*
* Another waiter already exists - bump up * Sanity check the waiter before increasing
* the refcount and return its pi_state: * the refcount and attaching to it.
*/ */
pi_state = this->pi_state; pi_state = this->pi_state;
/* /*
* Userspace might have messed up non-PI and PI futexes * Userspace might have messed up non-PI and
* PI futexes [3]
*/ */
if (unlikely(!pi_state)) if (unlikely(!pi_state))
return -EINVAL; return -EINVAL;
...@@ -617,44 +666,70 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, ...@@ -617,44 +666,70 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
WARN_ON(!atomic_read(&pi_state->refcount)); WARN_ON(!atomic_read(&pi_state->refcount));
/* /*
* When pi_state->owner is NULL then the owner died * Handle the owner died case:
* and another waiter is on the fly. pi_state->owner
* is fixed up by the task which acquires
* pi_state->rt_mutex.
*
* We do not check for pid == 0 which can happen when
* the owner died and robust_list_exit() cleared the
* TID.
*/ */
if (pid && pi_state->owner) { if (uval & FUTEX_OWNER_DIED) {
/* /*
* Bail out if user space manipulated the * exit_pi_state_list sets owner to NULL and
* futex value. * wakes the topmost waiter. The task which
* acquires the pi_state->rt_mutex will fixup
* owner.
*/ */
if (pid != task_pid_vnr(pi_state->owner)) if (!pi_state->owner) {
/*
* No pi state owner, but the user
* space TID is not 0. Inconsistent
* state. [5]
*/
if (pid)
return -EINVAL;
/*
* Take a ref on the state and
* return. [4]
*/
goto out_state;
}
/*
* If TID is 0, then either the dying owner
* has not yet executed exit_pi_state_list()
* or some waiter acquired the rtmutex in the
* pi state, but did not yet fixup the TID in
* user space.
*
* Take a ref on the state and return. [6]
*/
if (!pid)
goto out_state;
} else {
/*
* If the owner died bit is not set,
* then the pi_state must have an
* owner. [7]
*/
if (!pi_state->owner)
return -EINVAL; return -EINVAL;
} }
/* /*
* Protect against a corrupted uval. If uval * Bail out if user space manipulated the
* is 0x80000000 then pid is 0 and the waiter * futex value. If pi state exists then the
* bit is set. So the deadlock check in the * owner TID must be the same as the user
* calling code has failed and we did not fall * space TID. [9/10]
* into the check above due to !pid.
*/ */
if (task && pi_state->owner == task) if (pid != task_pid_vnr(pi_state->owner))
return -EDEADLK; return -EINVAL;
out_state:
atomic_inc(&pi_state->refcount); atomic_inc(&pi_state->refcount);
*ps = pi_state; *ps = pi_state;
return 0; return 0;
} }
} }
/* /*
* We are the first waiter - try to look up the real owner and attach * We are the first waiter - try to look up the real owner and attach
* the new pi_state to it, but bail out when TID = 0 * the new pi_state to it, but bail out when TID = 0 [1]
*/ */
if (!pid) if (!pid)
return -ESRCH; return -ESRCH;
...@@ -687,6 +762,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, ...@@ -687,6 +762,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
return ret; return ret;
} }
/*
* No existing pi state. First waiter. [2]
*/
pi_state = alloc_pi_state(); pi_state = alloc_pi_state();
/* /*
...@@ -807,7 +885,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, ...@@ -807,7 +885,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
* We dont have the lock. Look up the PI state (or create it if * We dont have the lock. Look up the PI state (or create it if
* we are the first waiter): * we are the first waiter):
*/ */
ret = lookup_pi_state(uval, hb, key, ps, task); ret = lookup_pi_state(uval, hb, key, ps);
if (unlikely(ret)) { if (unlikely(ret)) {
switch (ret) { switch (ret) {
...@@ -1410,7 +1488,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, ...@@ -1410,7 +1488,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
* rereading and handing potential crap to * rereading and handing potential crap to
* lookup_pi_state. * lookup_pi_state.
*/ */
ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL); ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
} }
switch (ret) { switch (ret) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment