Commit cb129263 authored by Neil Brown's avatar Neil Brown Committed by Linus Torvalds

[PATCH] md: Tidy up recovery_running flags in md

Md uses ->recovery_running and ->recovery_err to keep track of the
status or recovery.  This is rather ad hoc and race prone.

This patch changes it to ->recovery which has bit flags for various
states.
parent 07b24141
...@@ -1593,8 +1593,7 @@ static int do_md_stop(mddev_t * mddev, int ro) ...@@ -1593,8 +1593,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
if (mddev->pers) { if (mddev->pers) {
if (mddev->sync_thread) { if (mddev->sync_thread) {
if (mddev->recovery_running > 0) set_bit(MD_RECOVERY_INTR, &mddev->recovery);
mddev->recovery_running = -1;
md_unregister_thread(mddev->sync_thread); md_unregister_thread(mddev->sync_thread);
mddev->sync_thread = NULL; mddev->sync_thread = NULL;
} }
...@@ -2663,7 +2662,8 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) ...@@ -2663,7 +2662,8 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
seq_printf(seq, "] "); seq_printf(seq, "] ");
} }
seq_printf(seq, " %s =%3lu.%lu%% (%lu/%lu)", seq_printf(seq, " %s =%3lu.%lu%% (%lu/%lu)",
(mddev->spares ? "recovery" : "resync"), (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
"resync" : "recovery"),
res/10, res % 10, resync, max_blocks); res/10, res % 10, resync, max_blocks);
/* /*
...@@ -2896,8 +2896,7 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) ...@@ -2896,8 +2896,7 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
atomic_sub(blocks, &mddev->recovery_active); atomic_sub(blocks, &mddev->recovery_active);
wake_up(&mddev->recovery_wait); wake_up(&mddev->recovery_wait);
if (!ok) { if (!ok) {
mddev->recovery_error = -EIO; set_bit(MD_RECOVERY_ERR, &mddev->recovery);
mddev->recovery_running = -1;
md_recover_arrays(); md_recover_arrays();
// stop recovery, signal do_sync .... // stop recovery, signal do_sync ....
} }
...@@ -2927,7 +2926,8 @@ static inline void md_enter_safemode(mddev_t *mddev) ...@@ -2927,7 +2926,8 @@ static inline void md_enter_safemode(mddev_t *mddev)
{ {
mddev_lock_uninterruptible(mddev); mddev_lock_uninterruptible(mddev);
if (mddev->safemode && !atomic_read(&mddev->writes_pending) && !mddev->in_sync && !mddev->recovery_running) { if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
!mddev->in_sync && mddev->recovery_cp == MaxSector) {
mddev->in_sync = 1; mddev->in_sync = 1;
md_update_sb(mddev); md_update_sb(mddev);
} }
...@@ -2963,7 +2963,7 @@ static void md_do_sync(void *data) ...@@ -2963,7 +2963,7 @@ static void md_do_sync(void *data)
unsigned long last_check; unsigned long last_check;
/* just incase thread restarts... */ /* just incase thread restarts... */
if (mddev->recovery_running <= 0) if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
return; return;
/* we overload curr_resync somewhat here. /* we overload curr_resync somewhat here.
...@@ -3031,8 +3031,6 @@ static void md_do_sync(void *data) ...@@ -3031,8 +3031,6 @@ static void md_do_sync(void *data)
init_waitqueue_head(&mddev->recovery_wait); init_waitqueue_head(&mddev->recovery_wait);
last_check = 0; last_check = 0;
mddev->recovery_error = 0;
if (mddev->recovery_cp) if (mddev->recovery_cp)
printk(KERN_INFO "md: resuming recovery of md%d from checkpoint.\n", mdidx(mddev)); printk(KERN_INFO "md: resuming recovery of md%d from checkpoint.\n", mdidx(mddev));
...@@ -3053,6 +3051,10 @@ static void md_do_sync(void *data) ...@@ -3053,6 +3051,10 @@ static void md_do_sync(void *data)
last_check = j; last_check = j;
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) ||
test_bit(MD_RECOVERY_ERR, &mddev->recovery))
break;
blk_run_queues(); blk_run_queues();
repeat: repeat:
...@@ -3107,26 +3109,26 @@ static void md_do_sync(void *data) ...@@ -3107,26 +3109,26 @@ static void md_do_sync(void *data)
out: out:
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
if (mddev->recovery_running < 0 &&
!mddev->recovery_error && mddev->curr_resync > 2)
{
/* interrupted but no write errors */
printk(KERN_INFO "md: checkpointing recovery of md%d.\n", mdidx(mddev));
mddev->recovery_cp = mddev->curr_resync;
}
/* tell personality that we are finished */ /* tell personality that we are finished */
mddev->pers->sync_request(mddev, max_sectors, 1); mddev->pers->sync_request(mddev, max_sectors, 1);
skip:
mddev->curr_resync = 0;
if (err) if (err)
mddev->recovery_running = -1; set_bit(MD_RECOVERY_ERR, &mddev->recovery);
if (mddev->recovery_running > 0) if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
mddev->recovery_running = 0; mddev->curr_resync > 2 &&
if (mddev->recovery_running == 0) mddev->curr_resync > mddev->recovery_cp) {
mddev->recovery_cp = MaxSector; if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
printk(KERN_INFO "md: checkpointing recovery of md%d.\n", mdidx(mddev));
mddev->recovery_cp = mddev->curr_resync;
} else
mddev->recovery_cp = MaxSector;
}
if (mddev->safemode) if (mddev->safemode)
md_enter_safemode(mddev); md_enter_safemode(mddev);
skip:
mddev->curr_resync = 0;
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
md_recover_arrays(); md_recover_arrays();
} }
...@@ -3136,10 +3138,10 @@ static void md_do_sync(void *data) ...@@ -3136,10 +3138,10 @@ static void md_do_sync(void *data)
* action that might be needed. * action that might be needed.
* It does not do any resync itself, but rather "forks" off other threads * It does not do any resync itself, but rather "forks" off other threads
* to do that as needed. * to do that as needed.
* When it is determined that resync is needed, we set "->recovery_running" and * When it is determined that resync is needed, we set MD_RECOVERY_RUNNING in
* create a thread at ->sync_thread. * "->recovery" and create a thread at ->sync_thread.
* When the thread finishes it clears recovery_running (or sets an error) * When the thread finishes it sets MD_RECOVERY_DONE (and might set MD_RECOVERY_ERR)
* and wakeup up this thread which will reap the thread and finish up. * and wakeups up this thread which will reap the thread and finish up.
* This thread also removes any faulty devices (with nr_pending == 0). * This thread also removes any faulty devices (with nr_pending == 0).
* *
* The overall approach is: * The overall approach is:
...@@ -3160,31 +3162,32 @@ void md_do_recovery(void *data) ...@@ -3160,31 +3162,32 @@ void md_do_recovery(void *data)
dprintk(KERN_INFO "md: recovery thread got woken up ...\n"); dprintk(KERN_INFO "md: recovery thread got woken up ...\n");
ITERATE_MDDEV(mddev,tmp) if (mddev_lock(mddev)==0) { ITERATE_MDDEV(mddev,tmp) if (mddev_lock(mddev)==0) {
int spares =0;
if (!mddev->raid_disks || !mddev->pers || mddev->ro) if (!mddev->raid_disks || !mddev->pers || mddev->ro)
goto unlock; goto unlock;
if (mddev->sb_dirty) if (mddev->sb_dirty)
md_update_sb(mddev); md_update_sb(mddev);
if (mddev->recovery_running > 0) if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
!test_bit(MD_RECOVERY_DONE, &mddev->recovery))
/* resync/recovery still happening */ /* resync/recovery still happening */
goto unlock; goto unlock;
if (mddev->sync_thread) { if (mddev->sync_thread) {
/* resync has finished, collect result */ /* resync has finished, collect result */
md_unregister_thread(mddev->sync_thread); md_unregister_thread(mddev->sync_thread);
mddev->sync_thread = NULL; mddev->sync_thread = NULL;
if (mddev->recovery_running == 0) { if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery)) {
/* success...*/ /* success...*/
/* activate any spares */ /* activate any spares */
mddev->pers->spare_active(mddev); mddev->pers->spare_active(mddev);
mddev->spares = 0;
} }
md_update_sb(mddev); md_update_sb(mddev);
mddev->recovery_running = 0; mddev->recovery = 0;
wake_up(&resync_wait); wake_up(&resync_wait);
goto unlock; goto unlock;
} }
if (mddev->recovery_running) { if (mddev->recovery) {
/* that's odd.. */ /* that's odd.. */
mddev->recovery_running = 0; mddev->recovery = 0;
wake_up(&resync_wait); wake_up(&resync_wait);
} }
...@@ -3192,7 +3195,6 @@ void md_do_recovery(void *data) ...@@ -3192,7 +3195,6 @@ void md_do_recovery(void *data)
* remove any failed drives, then * remove any failed drives, then
* add spares if possible * add spares if possible
*/ */
mddev->spares = 0;
ITERATE_RDEV(mddev,rdev,rtmp) { ITERATE_RDEV(mddev,rdev,rtmp) {
if (rdev->raid_disk >= 0 && if (rdev->raid_disk >= 0 &&
rdev->faulty && rdev->faulty &&
...@@ -3201,35 +3203,35 @@ void md_do_recovery(void *data) ...@@ -3201,35 +3203,35 @@ void md_do_recovery(void *data)
rdev->raid_disk = -1; rdev->raid_disk = -1;
} }
if (!rdev->faulty && rdev->raid_disk >= 0 && !rdev->in_sync) if (!rdev->faulty && rdev->raid_disk >= 0 && !rdev->in_sync)
mddev->spares++; spares++;
} }
if (mddev->degraded) { if (mddev->degraded) {
ITERATE_RDEV(mddev,rdev,rtmp) ITERATE_RDEV(mddev,rdev,rtmp)
if (rdev->raid_disk < 0 if (rdev->raid_disk < 0
&& !rdev->faulty) { && !rdev->faulty) {
if (mddev->pers->hot_add_disk(mddev,rdev)) { if (mddev->pers->hot_add_disk(mddev,rdev))
mddev->spares++; spares++;
mddev->recovery_cp = 0;
}
else else
break; break;
} }
} }
if (!mddev->spares && (mddev->recovery_cp == MaxSector )) { if (!spares && (mddev->recovery_cp == MaxSector )) {
/* nothing we can do ... */ /* nothing we can do ... */
goto unlock; goto unlock;
} }
if (mddev->pers->sync_request) { if (mddev->pers->sync_request) {
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
if (!spares)
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
mddev->sync_thread = md_register_thread(md_do_sync, mddev->sync_thread = md_register_thread(md_do_sync,
mddev, mddev,
"md_resync"); "md_resync");
if (!mddev->sync_thread) { if (!mddev->sync_thread) {
printk(KERN_ERR "md%d: could not start resync thread...\n", mdidx(mddev)); printk(KERN_ERR "md%d: could not start resync thread...\n", mdidx(mddev));
/* leave the spares where they are, it shouldn't hurt */ /* leave the spares where they are, it shouldn't hurt */
mddev->recovery_running = 0; mddev->recovery = 0;
} else { } else {
mddev->recovery_running = 1;
md_wakeup_thread(mddev->sync_thread); md_wakeup_thread(mddev->sync_thread);
} }
} }
......
...@@ -623,10 +623,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -623,10 +623,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
mddev->degraded++; mddev->degraded++;
conf->working_disks--; conf->working_disks--;
/* /*
* if recovery was running, stop it now. * if recovery is running, make sure it aborts.
*/ */
if (mddev->recovery_running) set_bit(MD_RECOVERY_ERR, &mddev->recovery);
mddev->recovery_running = -EIO;
} }
rdev->in_sync = 0; rdev->in_sync = 0;
rdev->faulty = 1; rdev->faulty = 1;
......
...@@ -463,10 +463,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -463,10 +463,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
conf->failed_disks++; conf->failed_disks++;
rdev->in_sync = 0; rdev->in_sync = 0;
/* /*
* if recovery was running, stop it now. * if recovery was running, make sure it aborts.
*/ */
if (mddev->recovery_running) set_bit(MD_RECOVERY_ERR, &mddev->recovery);
mddev->recovery_running = -EIO;
} }
rdev->faulty = 1; rdev->faulty = 1;
printk (KERN_ALERT printk (KERN_ALERT
......
...@@ -210,18 +210,24 @@ struct mddev_s ...@@ -210,18 +210,24 @@ struct mddev_s
unsigned long curr_resync; /* blocks scheduled */ unsigned long curr_resync; /* blocks scheduled */
unsigned long resync_mark; /* a recent timestamp */ unsigned long resync_mark; /* a recent timestamp */
unsigned long resync_mark_cnt;/* blocks written at resync_mark */ unsigned long resync_mark_cnt;/* blocks written at resync_mark */
/* recovery_running is 0 for no recovery/resync,
* 1 for active recovery /* recovery/resync flags
* 2 for active resync * RUNNING: a thread is running, or about to be started
* -error for an error (e.g. -EINTR) * SYNC: actually doing a resync, not a recovery
* it can only be set > 0 under reconfig_sem * ERR: and IO error was detected - abort the resync/recovery
* INTR: someone requested a (clean) early abort.
* DONE: thread is done and is waiting to be reaped
*/ */
int recovery_running; #define MD_RECOVERY_RUNNING 0
int recovery_error; /* error from recovery write */ #define MD_RECOVERY_SYNC 1
#define MD_RECOVERY_ERR 2
#define MD_RECOVERY_INTR 3
#define MD_RECOVERY_DONE 4
unsigned long recovery;
int in_sync; /* know to not need resync */ int in_sync; /* know to not need resync */
struct semaphore reconfig_sem; struct semaphore reconfig_sem;
atomic_t active; atomic_t active;
int spares;
int degraded; /* whether md should consider int degraded; /* whether md should consider
* adding a spare * adding a spare
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment