Commit b17170b2 authored by Matthew Wilcox's avatar Matthew Wilcox Committed by Matthew Wilcox

Simplify semaphore implementation

By removing the negative values of 'count' and relying on the wait_list to
indicate whether we have any waiters, we can simplify the implementation
by removing the protection against an unlikely race condition.  Thanks to
David Howells for his suggestions.
Signed-off-by: default avatarMatthew Wilcox <willy@linux.intel.com>
parent f1241c87
...@@ -15,15 +15,12 @@ ...@@ -15,15 +15,12 @@
/* /*
* The spinlock controls access to the other members of the semaphore. * The spinlock controls access to the other members of the semaphore.
* 'count' is decremented by every task which calls down*() and incremented * 'count' represents how many more tasks can acquire this semaphore.
* by every call to up(). Thus, if it is positive, it indicates how many * Tasks waiting for the lock are kept on the wait_list.
* more tasks may acquire the lock. If it is negative, it indicates how
* many tasks are waiting for the lock. Tasks waiting for the lock are
* kept on the wait_list.
*/ */
struct semaphore { struct semaphore {
spinlock_t lock; spinlock_t lock;
int count; unsigned int count;
struct list_head wait_list; struct list_head wait_list;
}; };
......
...@@ -18,18 +18,8 @@ ...@@ -18,18 +18,8 @@
* down_trylock() and up() can be called from interrupt context. * down_trylock() and up() can be called from interrupt context.
* So we have to disable interrupts when taking the lock. * So we have to disable interrupts when taking the lock.
* *
* The ->count variable, if positive, defines how many more tasks can * The ->count variable defines how many more tasks can acquire the
* acquire the semaphore. If negative, it represents how many tasks are * semaphore. If it's zero, there may be tasks waiting on the list.
* waiting on the semaphore (*). If zero, no tasks are waiting, and no more
* tasks can acquire the semaphore.
*
* (*) Except for the window between one task calling up() and the task
* sleeping in a __down_common() waking up. In order to avoid a third task
* coming in and stealing the second task's wakeup, we leave the ->count
* negative. If we have a more complex situation, the ->count may become
* zero or negative (eg a semaphore with count = 2, three tasks attempt to
* acquire it, one sleeps, two finish and call up(), the second task to call
* up() notices that the list is empty and just increments count).
*/ */
static noinline void __down(struct semaphore *sem); static noinline void __down(struct semaphore *sem);
...@@ -43,7 +33,9 @@ void down(struct semaphore *sem) ...@@ -43,7 +33,9 @@ void down(struct semaphore *sem)
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&sem->lock, flags); spin_lock_irqsave(&sem->lock, flags);
if (unlikely(sem->count-- <= 0)) if (likely(sem->count > 0))
sem->count--;
else
__down(sem); __down(sem);
spin_unlock_irqrestore(&sem->lock, flags); spin_unlock_irqrestore(&sem->lock, flags);
} }
...@@ -55,7 +47,9 @@ int down_interruptible(struct semaphore *sem) ...@@ -55,7 +47,9 @@ int down_interruptible(struct semaphore *sem)
int result = 0; int result = 0;
spin_lock_irqsave(&sem->lock, flags); spin_lock_irqsave(&sem->lock, flags);
if (unlikely(sem->count-- <= 0)) if (likely(sem->count > 0))
sem->count--;
else
result = __down_interruptible(sem); result = __down_interruptible(sem);
spin_unlock_irqrestore(&sem->lock, flags); spin_unlock_irqrestore(&sem->lock, flags);
...@@ -69,7 +63,9 @@ int down_killable(struct semaphore *sem) ...@@ -69,7 +63,9 @@ int down_killable(struct semaphore *sem)
int result = 0; int result = 0;
spin_lock_irqsave(&sem->lock, flags); spin_lock_irqsave(&sem->lock, flags);
if (unlikely(sem->count-- <= 0)) if (likely(sem->count > 0))
sem->count--;
else
result = __down_killable(sem); result = __down_killable(sem);
spin_unlock_irqrestore(&sem->lock, flags); spin_unlock_irqrestore(&sem->lock, flags);
...@@ -111,7 +107,9 @@ int down_timeout(struct semaphore *sem, long jiffies) ...@@ -111,7 +107,9 @@ int down_timeout(struct semaphore *sem, long jiffies)
int result = 0; int result = 0;
spin_lock_irqsave(&sem->lock, flags); spin_lock_irqsave(&sem->lock, flags);
if (unlikely(sem->count-- <= 0)) if (likely(sem->count > 0))
sem->count--;
else
result = __down_timeout(sem, jiffies); result = __down_timeout(sem, jiffies);
spin_unlock_irqrestore(&sem->lock, flags); spin_unlock_irqrestore(&sem->lock, flags);
...@@ -124,7 +122,7 @@ void up(struct semaphore *sem) ...@@ -124,7 +122,7 @@ void up(struct semaphore *sem)
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&sem->lock, flags); spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count >= 0)) if (likely(list_empty(&sem->wait_list)))
sem->count++; sem->count++;
else else
__up(sem); __up(sem);
...@@ -140,22 +138,6 @@ struct semaphore_waiter { ...@@ -140,22 +138,6 @@ struct semaphore_waiter {
int up; int up;
}; };
/*
* Wake up a process waiting on a semaphore. We need to call this from both
* __up and __down_common as it's possible to race a task into the semaphore
* if it comes in at just the right time between two tasks calling up() and
* a third task waking up. This function assumes the wait_list is already
* checked for being non-empty.
*/
static noinline void __sched __up_down_common(struct semaphore *sem)
{
struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
struct semaphore_waiter, list);
list_del(&waiter->list);
waiter->up = 1;
wake_up_process(waiter->task);
}
/* /*
* Because this function is inlined, the 'state' parameter will be * Because this function is inlined, the 'state' parameter will be
* constant, and thus optimised away by the compiler. Likewise the * constant, and thus optimised away by the compiler. Likewise the
...@@ -164,7 +146,6 @@ static noinline void __sched __up_down_common(struct semaphore *sem) ...@@ -164,7 +146,6 @@ static noinline void __sched __up_down_common(struct semaphore *sem)
static inline int __sched __down_common(struct semaphore *sem, long state, static inline int __sched __down_common(struct semaphore *sem, long state,
long timeout) long timeout)
{ {
int result = 0;
struct task_struct *task = current; struct task_struct *task = current;
struct semaphore_waiter waiter; struct semaphore_waiter waiter;
...@@ -184,28 +165,16 @@ static inline int __sched __down_common(struct semaphore *sem, long state, ...@@ -184,28 +165,16 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
timeout = schedule_timeout(timeout); timeout = schedule_timeout(timeout);
spin_lock_irq(&sem->lock); spin_lock_irq(&sem->lock);
if (waiter.up) if (waiter.up)
goto woken; return 0;
} }
timed_out: timed_out:
list_del(&waiter.list); list_del(&waiter.list);
result = -ETIME; return -ETIME;
goto woken;
interrupted: interrupted:
list_del(&waiter.list); list_del(&waiter.list);
result = -EINTR; return -EINTR;
woken:
/*
* Account for the process which woke us up. For the case where
* we're interrupted, we need to increment the count on our own
* behalf. I don't believe we can hit the case where the
* sem->count hits zero, *and* there's a second task sleeping,
* but it doesn't hurt, that's not a commonly exercised path and
* it's not a performance path either.
*/
if (unlikely((++sem->count >= 0) && !list_empty(&sem->wait_list)))
__up_down_common(sem);
return result;
} }
static noinline void __sched __down(struct semaphore *sem) static noinline void __sched __down(struct semaphore *sem)
...@@ -230,8 +199,9 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies) ...@@ -230,8 +199,9 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies)
static noinline void __sched __up(struct semaphore *sem) static noinline void __sched __up(struct semaphore *sem)
{ {
if (unlikely(list_empty(&sem->wait_list))) struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
sem->count++; struct semaphore_waiter, list);
else list_del(&waiter->list);
__up_down_common(sem); waiter->up = 1;
wake_up_process(waiter->task);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment