Commit 3d310eb7 authored by NeilBrown's avatar NeilBrown Committed by Linus Torvalds

[PATCH] md: fix deadlock due to md thread processing delayed requests.

Before completing a 'write' the md superblock might need to be updated.
This is best done by the md_thread.

The current code schedules this up and queues the write request for later
handling by the md_thread.

However some personalities (Raid5/raid6) will deadlock if the md_thread
tries to submit requests to its own array.

So this patch changes things so the processes submitting the request waits
for the superblock to be written and then submits the request itself.

This fixes a recently-created deadlock in raid5/raid6
Signed-off-by: default avatarNeil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 41158c7e
...@@ -224,8 +224,8 @@ static mddev_t * mddev_find(dev_t unit) ...@@ -224,8 +224,8 @@ static mddev_t * mddev_find(dev_t unit)
INIT_LIST_HEAD(&new->all_mddevs); INIT_LIST_HEAD(&new->all_mddevs);
init_timer(&new->safemode_timer); init_timer(&new->safemode_timer);
atomic_set(&new->active, 1); atomic_set(&new->active, 1);
bio_list_init(&new->write_list);
spin_lock_init(&new->write_lock); spin_lock_init(&new->write_lock);
init_waitqueue_head(&new->sb_wait);
new->queue = blk_alloc_queue(GFP_KERNEL); new->queue = blk_alloc_queue(GFP_KERNEL);
if (!new->queue) { if (!new->queue) {
...@@ -1307,6 +1307,7 @@ static void md_update_sb(mddev_t * mddev) ...@@ -1307,6 +1307,7 @@ static void md_update_sb(mddev_t * mddev)
if (!mddev->persistent) { if (!mddev->persistent) {
mddev->sb_dirty = 0; mddev->sb_dirty = 0;
spin_unlock(&mddev->write_lock); spin_unlock(&mddev->write_lock);
wake_up(&mddev->sb_wait);
return; return;
} }
spin_unlock(&mddev->write_lock); spin_unlock(&mddev->write_lock);
...@@ -1348,6 +1349,7 @@ static void md_update_sb(mddev_t * mddev) ...@@ -1348,6 +1349,7 @@ static void md_update_sb(mddev_t * mddev)
} }
mddev->sb_dirty = 0; mddev->sb_dirty = 0;
spin_unlock(&mddev->write_lock); spin_unlock(&mddev->write_lock);
wake_up(&mddev->sb_wait);
} }
...@@ -3368,29 +3370,26 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) ...@@ -3368,29 +3370,26 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
/* md_write_start(mddev, bi) /* md_write_start(mddev, bi)
* If we need to update some array metadata (e.g. 'active' flag * If we need to update some array metadata (e.g. 'active' flag
* in superblock) before writing, queue bi for later writing * in superblock) before writing, schedule a superblock update
* and return 0, else return 1 and it will be written now * and wait for it to complete.
*/ */
int md_write_start(mddev_t *mddev, struct bio *bi) void md_write_start(mddev_t *mddev, struct bio *bi)
{ {
DEFINE_WAIT(w);
if (bio_data_dir(bi) != WRITE) if (bio_data_dir(bi) != WRITE)
return 1; return;
atomic_inc(&mddev->writes_pending); atomic_inc(&mddev->writes_pending);
if (mddev->in_sync) {
spin_lock(&mddev->write_lock); spin_lock(&mddev->write_lock);
if (mddev->in_sync == 0 && mddev->sb_dirty == 0) {
spin_unlock(&mddev->write_lock);
return 1;
}
bio_list_add(&mddev->write_list, bi);
if (mddev->in_sync) { if (mddev->in_sync) {
mddev->in_sync = 0; mddev->in_sync = 0;
mddev->sb_dirty = 1; mddev->sb_dirty = 1;
md_wakeup_thread(mddev->thread);
} }
spin_unlock(&mddev->write_lock); spin_unlock(&mddev->write_lock);
md_wakeup_thread(mddev->thread); }
return 0; wait_event(mddev->sb_wait, mddev->sb_dirty==0);
} }
void md_write_end(mddev_t *mddev) void md_write_end(mddev_t *mddev)
...@@ -3685,7 +3684,6 @@ void md_check_recovery(mddev_t *mddev) ...@@ -3685,7 +3684,6 @@ void md_check_recovery(mddev_t *mddev)
mddev->sb_dirty || mddev->sb_dirty ||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
test_bit(MD_RECOVERY_DONE, &mddev->recovery) || test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
mddev->write_list.head ||
(mddev->safemode == 1) || (mddev->safemode == 1) ||
(mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending) (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
&& !mddev->in_sync && mddev->recovery_cp == MaxSector) && !mddev->in_sync && mddev->recovery_cp == MaxSector)
...@@ -3694,7 +3692,6 @@ void md_check_recovery(mddev_t *mddev) ...@@ -3694,7 +3692,6 @@ void md_check_recovery(mddev_t *mddev)
if (mddev_trylock(mddev)==0) { if (mddev_trylock(mddev)==0) {
int spares =0; int spares =0;
struct bio *blist;
spin_lock(&mddev->write_lock); spin_lock(&mddev->write_lock);
if (mddev->safemode && !atomic_read(&mddev->writes_pending) && if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
...@@ -3704,21 +3701,11 @@ void md_check_recovery(mddev_t *mddev) ...@@ -3704,21 +3701,11 @@ void md_check_recovery(mddev_t *mddev)
} }
if (mddev->safemode == 1) if (mddev->safemode == 1)
mddev->safemode = 0; mddev->safemode = 0;
blist = bio_list_get(&mddev->write_list);
spin_unlock(&mddev->write_lock); spin_unlock(&mddev->write_lock);
if (mddev->sb_dirty) if (mddev->sb_dirty)
md_update_sb(mddev); md_update_sb(mddev);
while (blist) {
struct bio *b = blist;
blist = blist->bi_next;
b->bi_next = NULL;
generic_make_request(b);
/* we already counted this, so need to un-count */
md_write_end(mddev);
}
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
......
...@@ -561,8 +561,8 @@ static int make_request(request_queue_t *q, struct bio * bio) ...@@ -561,8 +561,8 @@ static int make_request(request_queue_t *q, struct bio * bio)
* thread has put up a bar for new requests. * thread has put up a bar for new requests.
* Continue immediately if no resync is active currently. * Continue immediately if no resync is active currently.
*/ */
if (md_write_start(mddev, bio)==0) md_write_start(mddev, bio); /* wait on superblock update early */
return 0;
spin_lock_irq(&conf->resync_lock); spin_lock_irq(&conf->resync_lock);
wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, ); wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
conf->nr_pending++; conf->nr_pending++;
......
...@@ -700,8 +700,7 @@ static int make_request(request_queue_t *q, struct bio * bio) ...@@ -700,8 +700,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
return 0; return 0;
} }
if (md_write_start(mddev, bio) == 0) md_write_start(mddev, bio);
return 0;
/* /*
* Register the new request and wait if the reconstruction * Register the new request and wait if the reconstruction
......
...@@ -1411,8 +1411,7 @@ static int make_request (request_queue_t *q, struct bio * bi) ...@@ -1411,8 +1411,7 @@ static int make_request (request_queue_t *q, struct bio * bi)
sector_t logical_sector, last_sector; sector_t logical_sector, last_sector;
struct stripe_head *sh; struct stripe_head *sh;
if (md_write_start(mddev, bi)==0) md_write_start(mddev, bi);
return 0;
if (bio_data_dir(bi)==WRITE) { if (bio_data_dir(bi)==WRITE) {
disk_stat_inc(mddev->gendisk, writes); disk_stat_inc(mddev->gendisk, writes);
......
...@@ -1570,8 +1570,7 @@ static int make_request (request_queue_t *q, struct bio * bi) ...@@ -1570,8 +1570,7 @@ static int make_request (request_queue_t *q, struct bio * bi)
sector_t logical_sector, last_sector; sector_t logical_sector, last_sector;
struct stripe_head *sh; struct stripe_head *sh;
if (md_write_start(mddev, bi)==0) md_write_start(mddev, bi);
return 0;
if (bio_data_dir(bi)==WRITE) { if (bio_data_dir(bi)==WRITE) {
disk_stat_inc(mddev->gendisk, writes); disk_stat_inc(mddev->gendisk, writes);
......
...@@ -69,7 +69,7 @@ extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev), ...@@ -69,7 +69,7 @@ extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev),
extern void md_unregister_thread (mdk_thread_t *thread); extern void md_unregister_thread (mdk_thread_t *thread);
extern void md_wakeup_thread(mdk_thread_t *thread); extern void md_wakeup_thread(mdk_thread_t *thread);
extern void md_check_recovery(mddev_t *mddev); extern void md_check_recovery(mddev_t *mddev);
extern int md_write_start(mddev_t *mddev, struct bio *bi); extern void md_write_start(mddev_t *mddev, struct bio *bi);
extern void md_write_end(mddev_t *mddev); extern void md_write_end(mddev_t *mddev);
extern void md_handle_safemode(mddev_t *mddev); extern void md_handle_safemode(mddev_t *mddev);
extern void md_done_sync(mddev_t *mddev, int blocks, int ok); extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
......
...@@ -261,7 +261,7 @@ struct mddev_s ...@@ -261,7 +261,7 @@ struct mddev_s
sector_t recovery_cp; sector_t recovery_cp;
spinlock_t write_lock; spinlock_t write_lock;
struct bio_list write_list; wait_queue_head_t sb_wait; /* for waiting on superblock updates */
unsigned int safemode; /* if set, update "clean" superblock unsigned int safemode; /* if set, update "clean" superblock
* when no writes pending. * when no writes pending.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment