Commit ec92d082 authored by Pierre Peiffer's avatar Pierre Peiffer Committed by Linus Torvalds

futex priority based wakeup

Today, all threads waiting for a given futex are woken in FIFO order (first
waiter woken first) instead of priority order.

This patch makes use of plist (pirotity ordered lists) instead of simple list
in futex_hash_bucket.

All non-RT threads are stored with priority MAX_RT_PRIO, causing them to be
woken last, in FIFO order (RT-threads are woken first, in priority order).
Signed-off-by: default avatarSebastien Dugue <sebastien.dugue@bull.net>
Signed-off-by: default avatarPierre Peiffer <pierre.peiffer@bull.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f34c506b
...@@ -81,12 +81,12 @@ struct futex_pi_state { ...@@ -81,12 +81,12 @@ struct futex_pi_state {
* we can wake only the relevant ones (hashed queues may be shared). * we can wake only the relevant ones (hashed queues may be shared).
* *
* A futex_q has a woken state, just like tasks have TASK_RUNNING. * A futex_q has a woken state, just like tasks have TASK_RUNNING.
* It is considered woken when list_empty(&q->list) || q->lock_ptr == 0. * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
* The order of wakup is always to make the first condition true, then * The order of wakup is always to make the first condition true, then
* wake up q->waiters, then make the second condition true. * wake up q->waiters, then make the second condition true.
*/ */
struct futex_q { struct futex_q {
struct list_head list; struct plist_node list;
wait_queue_head_t waiters; wait_queue_head_t waiters;
/* Which hash list lock to use: */ /* Which hash list lock to use: */
...@@ -108,8 +108,8 @@ struct futex_q { ...@@ -108,8 +108,8 @@ struct futex_q {
* Split the global futex_lock into every hash list lock. * Split the global futex_lock into every hash list lock.
*/ */
struct futex_hash_bucket { struct futex_hash_bucket {
spinlock_t lock; spinlock_t lock;
struct list_head chain; struct plist_head chain;
}; };
static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
...@@ -443,13 +443,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) ...@@ -443,13 +443,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
{ {
struct futex_pi_state *pi_state = NULL; struct futex_pi_state *pi_state = NULL;
struct futex_q *this, *next; struct futex_q *this, *next;
struct list_head *head; struct plist_head *head;
struct task_struct *p; struct task_struct *p;
pid_t pid; pid_t pid;
head = &hb->chain; head = &hb->chain;
list_for_each_entry_safe(this, next, head, list) { plist_for_each_entry_safe(this, next, head, list) {
if (match_futex(&this->key, &me->key)) { if (match_futex(&this->key, &me->key)) {
/* /*
* Another waiter already exists - bump up * Another waiter already exists - bump up
...@@ -513,12 +513,12 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) ...@@ -513,12 +513,12 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
*/ */
static void wake_futex(struct futex_q *q) static void wake_futex(struct futex_q *q)
{ {
list_del_init(&q->list); plist_del(&q->list, &q->list.plist);
if (q->filp) if (q->filp)
send_sigio(&q->filp->f_owner, q->fd, POLL_IN); send_sigio(&q->filp->f_owner, q->fd, POLL_IN);
/* /*
* The lock in wake_up_all() is a crucial memory barrier after the * The lock in wake_up_all() is a crucial memory barrier after the
* list_del_init() and also before assigning to q->lock_ptr. * plist_del() and also before assigning to q->lock_ptr.
*/ */
wake_up_all(&q->waiters); wake_up_all(&q->waiters);
/* /*
...@@ -633,7 +633,7 @@ static int futex_wake(u32 __user *uaddr, int nr_wake) ...@@ -633,7 +633,7 @@ static int futex_wake(u32 __user *uaddr, int nr_wake)
{ {
struct futex_hash_bucket *hb; struct futex_hash_bucket *hb;
struct futex_q *this, *next; struct futex_q *this, *next;
struct list_head *head; struct plist_head *head;
union futex_key key; union futex_key key;
int ret; int ret;
...@@ -647,7 +647,7 @@ static int futex_wake(u32 __user *uaddr, int nr_wake) ...@@ -647,7 +647,7 @@ static int futex_wake(u32 __user *uaddr, int nr_wake)
spin_lock(&hb->lock); spin_lock(&hb->lock);
head = &hb->chain; head = &hb->chain;
list_for_each_entry_safe(this, next, head, list) { plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key)) { if (match_futex (&this->key, &key)) {
if (this->pi_state) { if (this->pi_state) {
ret = -EINVAL; ret = -EINVAL;
...@@ -675,7 +675,7 @@ futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2, ...@@ -675,7 +675,7 @@ futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2,
{ {
union futex_key key1, key2; union futex_key key1, key2;
struct futex_hash_bucket *hb1, *hb2; struct futex_hash_bucket *hb1, *hb2;
struct list_head *head; struct plist_head *head;
struct futex_q *this, *next; struct futex_q *this, *next;
int ret, op_ret, attempt = 0; int ret, op_ret, attempt = 0;
...@@ -748,7 +748,7 @@ futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2, ...@@ -748,7 +748,7 @@ futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2,
head = &hb1->chain; head = &hb1->chain;
list_for_each_entry_safe(this, next, head, list) { plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key1)) { if (match_futex (&this->key, &key1)) {
wake_futex(this); wake_futex(this);
if (++ret >= nr_wake) if (++ret >= nr_wake)
...@@ -760,7 +760,7 @@ futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2, ...@@ -760,7 +760,7 @@ futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2,
head = &hb2->chain; head = &hb2->chain;
op_ret = 0; op_ret = 0;
list_for_each_entry_safe(this, next, head, list) { plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key2)) { if (match_futex (&this->key, &key2)) {
wake_futex(this); wake_futex(this);
if (++op_ret >= nr_wake2) if (++op_ret >= nr_wake2)
...@@ -787,7 +787,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, ...@@ -787,7 +787,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
{ {
union futex_key key1, key2; union futex_key key1, key2;
struct futex_hash_bucket *hb1, *hb2; struct futex_hash_bucket *hb1, *hb2;
struct list_head *head1; struct plist_head *head1;
struct futex_q *this, *next; struct futex_q *this, *next;
int ret, drop_count = 0; int ret, drop_count = 0;
...@@ -836,7 +836,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, ...@@ -836,7 +836,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
} }
head1 = &hb1->chain; head1 = &hb1->chain;
list_for_each_entry_safe(this, next, head1, list) { plist_for_each_entry_safe(this, next, head1, list) {
if (!match_futex (&this->key, &key1)) if (!match_futex (&this->key, &key1))
continue; continue;
if (++ret <= nr_wake) { if (++ret <= nr_wake) {
...@@ -847,9 +847,13 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, ...@@ -847,9 +847,13 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
* requeue. * requeue.
*/ */
if (likely(head1 != &hb2->chain)) { if (likely(head1 != &hb2->chain)) {
list_move_tail(&this->list, &hb2->chain); plist_del(&this->list, &hb1->chain);
plist_add(&this->list, &hb2->chain);
this->lock_ptr = &hb2->lock; this->lock_ptr = &hb2->lock;
} #ifdef CONFIG_DEBUG_PI_LIST
this->list.plist.lock = &hb2->lock;
#endif
}
this->key = key2; this->key = key2;
get_futex_key_refs(&key2); get_futex_key_refs(&key2);
drop_count++; drop_count++;
...@@ -894,7 +898,23 @@ queue_lock(struct futex_q *q, int fd, struct file *filp) ...@@ -894,7 +898,23 @@ queue_lock(struct futex_q *q, int fd, struct file *filp)
static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
{ {
list_add_tail(&q->list, &hb->chain); int prio;
/*
* The priority used to register this element is
* - either the real thread-priority for the real-time threads
* (i.e. threads with a priority lower than MAX_RT_PRIO)
* - or MAX_RT_PRIO for non-RT threads.
* Thus, all RT-threads are woken first in priority order, and
* the others are woken last, in FIFO order.
*/
prio = min(current->normal_prio, MAX_RT_PRIO);
plist_node_init(&q->list, prio);
#ifdef CONFIG_DEBUG_PI_LIST
q->list.plist.lock = &hb->lock;
#endif
plist_add(&q->list, &hb->chain);
q->task = current; q->task = current;
spin_unlock(&hb->lock); spin_unlock(&hb->lock);
} }
...@@ -949,8 +969,8 @@ static int unqueue_me(struct futex_q *q) ...@@ -949,8 +969,8 @@ static int unqueue_me(struct futex_q *q)
spin_unlock(lock_ptr); spin_unlock(lock_ptr);
goto retry; goto retry;
} }
WARN_ON(list_empty(&q->list)); WARN_ON(plist_node_empty(&q->list));
list_del(&q->list); plist_del(&q->list, &q->list.plist);
BUG_ON(q->pi_state); BUG_ON(q->pi_state);
...@@ -968,8 +988,8 @@ static int unqueue_me(struct futex_q *q) ...@@ -968,8 +988,8 @@ static int unqueue_me(struct futex_q *q)
*/ */
static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb) static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb)
{ {
WARN_ON(list_empty(&q->list)); WARN_ON(plist_node_empty(&q->list));
list_del(&q->list); plist_del(&q->list, &q->list.plist);
BUG_ON(!q->pi_state); BUG_ON(!q->pi_state);
free_pi_state(q->pi_state); free_pi_state(q->pi_state);
...@@ -1065,11 +1085,11 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val, ...@@ -1065,11 +1085,11 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val,
__set_current_state(TASK_INTERRUPTIBLE); __set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&q.waiters, &wait); add_wait_queue(&q.waiters, &wait);
/* /*
* !list_empty() is safe here without any lock. * !plist_node_empty() is safe here without any lock.
* q.lock_ptr != 0 is not safe, because of ordering against wakeup. * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
*/ */
time_left = 0; time_left = 0;
if (likely(!list_empty(&q.list))) { if (likely(!plist_node_empty(&q.list))) {
unsigned long rel_time; unsigned long rel_time;
if (timed) { if (timed) {
...@@ -1384,7 +1404,7 @@ static int futex_unlock_pi(u32 __user *uaddr) ...@@ -1384,7 +1404,7 @@ static int futex_unlock_pi(u32 __user *uaddr)
struct futex_hash_bucket *hb; struct futex_hash_bucket *hb;
struct futex_q *this, *next; struct futex_q *this, *next;
u32 uval; u32 uval;
struct list_head *head; struct plist_head *head;
union futex_key key; union futex_key key;
int ret, attempt = 0; int ret, attempt = 0;
...@@ -1435,7 +1455,7 @@ static int futex_unlock_pi(u32 __user *uaddr) ...@@ -1435,7 +1455,7 @@ static int futex_unlock_pi(u32 __user *uaddr)
*/ */
head = &hb->chain; head = &hb->chain;
list_for_each_entry_safe(this, next, head, list) { plist_for_each_entry_safe(this, next, head, list) {
if (!match_futex (&this->key, &key)) if (!match_futex (&this->key, &key))
continue; continue;
ret = wake_futex_pi(uaddr, uval, this); ret = wake_futex_pi(uaddr, uval, this);
...@@ -1509,10 +1529,10 @@ static unsigned int futex_poll(struct file *filp, ...@@ -1509,10 +1529,10 @@ static unsigned int futex_poll(struct file *filp,
poll_wait(filp, &q->waiters, wait); poll_wait(filp, &q->waiters, wait);
/* /*
* list_empty() is safe here without any lock. * plist_node_empty() is safe here without any lock.
* q->lock_ptr != 0 is not safe, because of ordering against wakeup. * q->lock_ptr != 0 is not safe, because of ordering against wakeup.
*/ */
if (list_empty(&q->list)) if (plist_node_empty(&q->list))
ret = POLLIN | POLLRDNORM; ret = POLLIN | POLLRDNORM;
return ret; return ret;
...@@ -1895,7 +1915,7 @@ static int __init init(void) ...@@ -1895,7 +1915,7 @@ static int __init init(void)
} }
for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
INIT_LIST_HEAD(&futex_queues[i].chain); plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
spin_lock_init(&futex_queues[i].lock); spin_lock_init(&futex_queues[i].lock);
} }
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment