Commit ac05f256 authored by NeilBrown's avatar NeilBrown

md: don't start resync thread directly from md thread.

The main 'md' thread is needed for processing writes, so if it blocks
write requests could be delayed.

Starting a new thread requires some GFP_KERNEL allocations and so can
wait for writes to complete.  This can deadlock.

So instead, ask a workqueue to start the sync thread.
There is no particular rush for this to happen, so any work queue
will do.

MD_RECOVERY_RUNNING is used to ensure only one thread is started.
Reported-by: default avatarBillStuff <billstuff2001@sbcglobal.net>
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 8b1afc3d
...@@ -7767,6 +7767,33 @@ static int remove_and_add_spares(struct mddev *mddev, ...@@ -7767,6 +7767,33 @@ static int remove_and_add_spares(struct mddev *mddev,
return spares; return spares;
} }
static void md_start_sync(struct work_struct *ws)
{
struct mddev *mddev = container_of(ws, struct mddev, del_work);
mddev->sync_thread = md_register_thread(md_do_sync,
mddev,
"resync");
if (!mddev->sync_thread) {
printk(KERN_ERR "%s: could not start resync"
" thread...\n",
mdname(mddev));
/* leave the spares where they are, it shouldn't hurt */
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
if (test_and_clear_bit(MD_RECOVERY_RECOVER,
&mddev->recovery))
if (mddev->sysfs_action)
sysfs_notify_dirent_safe(mddev->sysfs_action);
} else
md_wakeup_thread(mddev->sync_thread);
sysfs_notify_dirent_safe(mddev->sysfs_action);
md_new_event(mddev);
}
/* /*
* This routine is regularly called by all per-raid-array threads to * This routine is regularly called by all per-raid-array threads to
* deal with generic issues like resync and super-block update. * deal with generic issues like resync and super-block update.
...@@ -7883,7 +7910,7 @@ void md_check_recovery(struct mddev *mddev) ...@@ -7883,7 +7910,7 @@ void md_check_recovery(struct mddev *mddev)
if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
goto unlock; goto not_running;
/* no recovery is running. /* no recovery is running.
* remove any failed drives, then * remove any failed drives, then
* add spares if possible. * add spares if possible.
...@@ -7895,7 +7922,7 @@ void md_check_recovery(struct mddev *mddev) ...@@ -7895,7 +7922,7 @@ void md_check_recovery(struct mddev *mddev)
if (mddev->pers->check_reshape == NULL || if (mddev->pers->check_reshape == NULL ||
mddev->pers->check_reshape(mddev) != 0) mddev->pers->check_reshape(mddev) != 0)
/* Cannot proceed */ /* Cannot proceed */
goto unlock; goto not_running;
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
} else if ((spares = remove_and_add_spares(mddev, NULL))) { } else if ((spares = remove_and_add_spares(mddev, NULL))) {
...@@ -7908,7 +7935,7 @@ void md_check_recovery(struct mddev *mddev) ...@@ -7908,7 +7935,7 @@ void md_check_recovery(struct mddev *mddev)
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
} else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
/* nothing to be done ... */ /* nothing to be done ... */
goto unlock; goto not_running;
if (mddev->pers->sync_request) { if (mddev->pers->sync_request) {
if (spares) { if (spares) {
...@@ -7918,27 +7945,11 @@ void md_check_recovery(struct mddev *mddev) ...@@ -7918,27 +7945,11 @@ void md_check_recovery(struct mddev *mddev)
*/ */
bitmap_write_all(mddev->bitmap); bitmap_write_all(mddev->bitmap);
} }
mddev->sync_thread = md_register_thread(md_do_sync, INIT_WORK(&mddev->del_work, md_start_sync);
mddev, queue_work(md_misc_wq, &mddev->del_work);
"resync"); goto unlock;
if (!mddev->sync_thread) {
printk(KERN_ERR "%s: could not start resync"
" thread...\n",
mdname(mddev));
/* leave the spares where they are, it shouldn't hurt */
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
} else
md_wakeup_thread(mddev->sync_thread);
sysfs_notify_dirent_safe(mddev->sysfs_action);
md_new_event(mddev);
} }
unlock: not_running:
wake_up(&mddev->sb_wait);
if (!mddev->sync_thread) { if (!mddev->sync_thread) {
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
if (test_and_clear_bit(MD_RECOVERY_RECOVER, if (test_and_clear_bit(MD_RECOVERY_RECOVER,
...@@ -7946,6 +7957,8 @@ void md_check_recovery(struct mddev *mddev) ...@@ -7946,6 +7957,8 @@ void md_check_recovery(struct mddev *mddev)
if (mddev->sysfs_action) if (mddev->sysfs_action)
sysfs_notify_dirent_safe(mddev->sysfs_action); sysfs_notify_dirent_safe(mddev->sysfs_action);
} }
unlock:
wake_up(&mddev->sb_wait);
mddev_unlock(mddev); mddev_unlock(mddev);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment