Commit e9028b0f authored by Anton Blanchard's avatar Anton Blanchard Committed by Linus Torvalds

[PATCH] fix scheduler deadlock

We have noticed lockups during boot when stress testing kexec on ppc64.
Two cpus would deadlock in scheduler code trying to grab already taken
spinlocks.

The double_rq_lock code uses the address of the runqueue to order the
taking of multiple locks.  This address is a per cpu variable:

	if (rq1 < rq2) {
		spin_lock(&rq1->lock);
		spin_lock(&rq2->lock);
	} else {
		spin_lock(&rq2->lock);
		spin_lock(&rq1->lock);
	}

On the other hand, the code in wake_sleeping_dependent uses the cpu id
order to grab locks:

	for_each_cpu_mask(i, sibling_map)
		spin_lock(&cpu_rq(i)->lock);

This means we rely on the address of per cpu data increasing as cpu ids
increase.  While this will be true for the generic percpu implementation it
may not be true for arch specific implementations.

One way to solve this is to always take runqueues in cpu id order. To do
this we add a cpu variable to the runqueue and check it in the
double runqueue locking functions.
Signed-off-by: default avatarAnton Blanchard <anton@samba.org>
Acked-by: default avatarIngo Molnar <mingo@elte.hu>
Cc: <stable@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 5be0e951
...@@ -237,6 +237,7 @@ struct runqueue { ...@@ -237,6 +237,7 @@ struct runqueue {
task_t *migration_thread; task_t *migration_thread;
struct list_head migration_queue; struct list_head migration_queue;
int cpu;
#endif #endif
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS
...@@ -1654,6 +1655,9 @@ unsigned long nr_iowait(void) ...@@ -1654,6 +1655,9 @@ unsigned long nr_iowait(void)
/* /*
* double_rq_lock - safely lock two runqueues * double_rq_lock - safely lock two runqueues
* *
* We must take them in cpu order to match code in
* dependent_sleeper and wake_dependent_sleeper.
*
* Note this does not disable interrupts like task_rq_lock, * Note this does not disable interrupts like task_rq_lock,
* you need to do so manually before calling. * you need to do so manually before calling.
*/ */
...@@ -1665,7 +1669,7 @@ static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2) ...@@ -1665,7 +1669,7 @@ static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
spin_lock(&rq1->lock); spin_lock(&rq1->lock);
__acquire(rq2->lock); /* Fake it out ;) */ __acquire(rq2->lock); /* Fake it out ;) */
} else { } else {
if (rq1 < rq2) { if (rq1->cpu < rq2->cpu) {
spin_lock(&rq1->lock); spin_lock(&rq1->lock);
spin_lock(&rq2->lock); spin_lock(&rq2->lock);
} else { } else {
...@@ -1701,7 +1705,7 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest) ...@@ -1701,7 +1705,7 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
__acquires(this_rq->lock) __acquires(this_rq->lock)
{ {
if (unlikely(!spin_trylock(&busiest->lock))) { if (unlikely(!spin_trylock(&busiest->lock))) {
if (busiest < this_rq) { if (busiest->cpu < this_rq->cpu) {
spin_unlock(&this_rq->lock); spin_unlock(&this_rq->lock);
spin_lock(&busiest->lock); spin_lock(&busiest->lock);
spin_lock(&this_rq->lock); spin_lock(&this_rq->lock);
...@@ -6029,6 +6033,7 @@ void __init sched_init(void) ...@@ -6029,6 +6033,7 @@ void __init sched_init(void)
rq->push_cpu = 0; rq->push_cpu = 0;
rq->migration_thread = NULL; rq->migration_thread = NULL;
INIT_LIST_HEAD(&rq->migration_queue); INIT_LIST_HEAD(&rq->migration_queue);
rq->cpu = i;
#endif #endif
atomic_set(&rq->nr_iowait, 0); atomic_set(&rq->nr_iowait, 0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment