Commit 635f6416 authored by NeilBrown's avatar NeilBrown

md/raid10: locking changes for 'enough()'.

As 'enough' accesses conf->prev and conf->geo, which can change
spontanously, it should guard against changes.
This can be done with device_lock as start_reshape holds device_lock
while updating 'geo' and end_reshape holds it while updating 'prev'.

So 'error' needs to hold 'device_lock'.

On the other hand, raid10_end_read_request knows which of the two it
really wants to access, and as it is an active request on that one,
the value cannot change underneath it.

So change _enough to take flag rather than a pointer, pass the
appropriate flag from raid10_end_read_request(), and remove the locking.

All other calls to 'enough' are made with reconfig_mutex held, so
neither 'prev' nor 'geo' can change.
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent b29bebd6
...@@ -97,7 +97,7 @@ static int max_queued_requests = 1024; ...@@ -97,7 +97,7 @@ static int max_queued_requests = 1024;
static void allow_barrier(struct r10conf *conf); static void allow_barrier(struct r10conf *conf);
static void lower_barrier(struct r10conf *conf); static void lower_barrier(struct r10conf *conf);
static int enough(struct r10conf *conf, int ignore); static int _enough(struct r10conf *conf, int previous, int ignore);
static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
int *skipped); int *skipped);
static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio); static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
...@@ -392,11 +392,9 @@ static void raid10_end_read_request(struct bio *bio, int error) ...@@ -392,11 +392,9 @@ static void raid10_end_read_request(struct bio *bio, int error)
* than fail the last device. Here we redefine * than fail the last device. Here we redefine
* "uptodate" to mean "Don't want to retry" * "uptodate" to mean "Don't want to retry"
*/ */
unsigned long flags; if (!_enough(conf, test_bit(R10BIO_Previous, &r10_bio->state),
spin_lock_irqsave(&conf->device_lock, flags); rdev->raid_disk))
if (!enough(conf, rdev->raid_disk))
uptodate = 1; uptodate = 1;
spin_unlock_irqrestore(&conf->device_lock, flags);
} }
if (uptodate) { if (uptodate) {
raid_end_bio_io(r10_bio); raid_end_bio_io(r10_bio);
...@@ -1632,9 +1630,17 @@ static void status(struct seq_file *seq, struct mddev *mddev) ...@@ -1632,9 +1630,17 @@ static void status(struct seq_file *seq, struct mddev *mddev)
* Don't consider the device numbered 'ignore' * Don't consider the device numbered 'ignore'
* as we might be about to remove it. * as we might be about to remove it.
*/ */
static int _enough(struct r10conf *conf, struct geom *geo, int ignore) static int _enough(struct r10conf *conf, int previous, int ignore)
{ {
int first = 0; int first = 0;
int disks, ncopies;
if (previous) {
disks = conf->prev.raid_disks;
ncopies = conf->prev.near_copies;
} else {
disks = conf->geo.raid_disks;
ncopies = conf->geo.near_copies;
}
do { do {
int n = conf->copies; int n = conf->copies;
...@@ -1644,25 +1650,31 @@ static int _enough(struct r10conf *conf, struct geom *geo, int ignore) ...@@ -1644,25 +1650,31 @@ static int _enough(struct r10conf *conf, struct geom *geo, int ignore)
if (conf->mirrors[this].rdev && if (conf->mirrors[this].rdev &&
this != ignore) this != ignore)
cnt++; cnt++;
this = (this+1) % geo->raid_disks; this = (this+1) % disks;
} }
if (cnt == 0) if (cnt == 0)
return 0; return 0;
first = (first + geo->near_copies) % geo->raid_disks; first = (first + ncopies) % disks;
} while (first != 0); } while (first != 0);
return 1; return 1;
} }
static int enough(struct r10conf *conf, int ignore) static int enough(struct r10conf *conf, int ignore)
{ {
return _enough(conf, &conf->geo, ignore) && /* when calling 'enough', both 'prev' and 'geo' must
_enough(conf, &conf->prev, ignore); * be stable.
* This is ensured if ->reconfig_mutex or ->device_lock
* is held.
*/
return _enough(conf, 0, ignore) &&
_enough(conf, 1, ignore);
} }
static void error(struct mddev *mddev, struct md_rdev *rdev) static void error(struct mddev *mddev, struct md_rdev *rdev)
{ {
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
struct r10conf *conf = mddev->private; struct r10conf *conf = mddev->private;
unsigned long flags;
/* /*
* If it is not operational, then we have already marked it as dead * If it is not operational, then we have already marked it as dead
...@@ -1670,18 +1682,18 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1670,18 +1682,18 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
* next level up know. * next level up know.
* else mark the drive as failed * else mark the drive as failed
*/ */
spin_lock_irqsave(&conf->device_lock, flags);
if (test_bit(In_sync, &rdev->flags) if (test_bit(In_sync, &rdev->flags)
&& !enough(conf, rdev->raid_disk)) && !enough(conf, rdev->raid_disk)) {
/* /*
* Don't fail the drive, just return an IO error. * Don't fail the drive, just return an IO error.
*/ */
spin_unlock_irqrestore(&conf->device_lock, flags);
return; return;
}
if (test_and_clear_bit(In_sync, &rdev->flags)) { if (test_and_clear_bit(In_sync, &rdev->flags)) {
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded++; mddev->degraded++;
spin_unlock_irqrestore(&conf->device_lock, flags); /*
/*
* if recovery is running, make sure it aborts. * if recovery is running, make sure it aborts.
*/ */
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
...@@ -1689,6 +1701,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1689,6 +1701,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
set_bit(Blocked, &rdev->flags); set_bit(Blocked, &rdev->flags);
set_bit(Faulty, &rdev->flags); set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
spin_unlock_irqrestore(&conf->device_lock, flags);
printk(KERN_ALERT printk(KERN_ALERT
"md/raid10:%s: Disk failure on %s, disabling device.\n" "md/raid10:%s: Disk failure on %s, disabling device.\n"
"md/raid10:%s: Operation continuing on %d devices.\n", "md/raid10:%s: Operation continuing on %d devices.\n",
...@@ -1791,7 +1804,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1791,7 +1804,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
* very different from resync * very different from resync
*/ */
return -EBUSY; return -EBUSY;
if (rdev->saved_raid_disk < 0 && !_enough(conf, &conf->prev, -1)) if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1))
return -EINVAL; return -EINVAL;
if (rdev->raid_disk >= 0) if (rdev->raid_disk >= 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment