Commit 7c4e06ff authored by NeilBrown's avatar NeilBrown

md/raid10: some tidying up in fix_read_error

Currently the rdev on which a read error happened could be removed
before we perform the fix_error handling.  This requires extra tests
for NULL.

So delay the rdev_dec_pending call until after the call to
fix_read_error so that we can be sure that the rdev still exists.

This allows an 'if' clause to be removed so the body gets re-indented
back one level.
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent af6d7b76
...@@ -271,9 +271,10 @@ static void raid10_end_read_request(struct bio *bio, int error) ...@@ -271,9 +271,10 @@ static void raid10_end_read_request(struct bio *bio, int error)
*/ */
set_bit(R10BIO_Uptodate, &r10_bio->state); set_bit(R10BIO_Uptodate, &r10_bio->state);
raid_end_bio_io(r10_bio); raid_end_bio_io(r10_bio);
rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
} else { } else {
/* /*
* oops, read error: * oops, read error - keep the refcount on the rdev
*/ */
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
if (printk_ratelimit()) if (printk_ratelimit())
...@@ -282,8 +283,6 @@ static void raid10_end_read_request(struct bio *bio, int error) ...@@ -282,8 +283,6 @@ static void raid10_end_read_request(struct bio *bio, int error)
bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector); bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector);
reschedule_retry(r10_bio); reschedule_retry(r10_bio);
} }
rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
} }
static void raid10_end_write_request(struct bio *bio, int error) static void raid10_end_write_request(struct bio *bio, int error)
...@@ -1438,40 +1437,33 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) ...@@ -1438,40 +1437,33 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
int max_read_errors = atomic_read(&mddev->max_corr_read_errors); int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
int d = r10_bio->devs[r10_bio->read_slot].devnum; int d = r10_bio->devs[r10_bio->read_slot].devnum;
rcu_read_lock(); /* still own a reference to this rdev, so it cannot
rdev = rcu_dereference(conf->mirrors[d].rdev); * have been cleared recently.
if (rdev) { /* If rdev is not NULL */ */
char b[BDEVNAME_SIZE]; rdev = conf->mirrors[d].rdev;
int cur_read_error_count = 0;
bdevname(rdev->bdev, b); if (test_bit(Faulty, &rdev->flags))
/* drive has already been failed, just ignore any
more fix_read_error() attempts */
return;
if (test_bit(Faulty, &rdev->flags)) { check_decay_read_errors(mddev, rdev);
rcu_read_unlock(); atomic_inc(&rdev->read_errors);
/* drive has already been failed, just ignore any if (atomic_read(&rdev->read_errors) > max_read_errors) {
more fix_read_error() attempts */ char b[BDEVNAME_SIZE];
return; bdevname(rdev->bdev, b);
}
check_decay_read_errors(mddev, rdev); printk(KERN_NOTICE
atomic_inc(&rdev->read_errors); "md/raid10:%s: %s: Raid device exceeded "
cur_read_error_count = atomic_read(&rdev->read_errors); "read_error threshold [cur %d:max %d]\n",
if (cur_read_error_count > max_read_errors) { mdname(mddev), b,
rcu_read_unlock(); atomic_read(&rdev->read_errors), max_read_errors);
printk(KERN_NOTICE printk(KERN_NOTICE
"md/raid10:%s: %s: Raid device exceeded " "md/raid10:%s: %s: Failing raid device\n",
"read_error threshold " mdname(mddev), b);
"[cur %d:max %d]\n", md_error(mddev, conf->mirrors[d].rdev);
mdname(mddev), return;
b, cur_read_error_count, max_read_errors);
printk(KERN_NOTICE
"md/raid10:%s: %s: Failing raid "
"device\n", mdname(mddev), b);
md_error(mddev, conf->mirrors[d].rdev);
return;
}
} }
rcu_read_unlock();
while(sectors) { while(sectors) {
int s = sectors; int s = sectors;
...@@ -1540,8 +1532,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) ...@@ -1540,8 +1532,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
"write failed" "write failed"
" (%d sectors at %llu on %s)\n", " (%d sectors at %llu on %s)\n",
mdname(mddev), s, mdname(mddev), s,
(unsigned long long)(sect+ (unsigned long long)(
rdev->data_offset), sect + rdev->data_offset),
bdevname(rdev->bdev, b)); bdevname(rdev->bdev, b));
printk(KERN_NOTICE "md/raid10:%s: %s: failing " printk(KERN_NOTICE "md/raid10:%s: %s: failing "
"drive\n", "drive\n",
...@@ -1577,8 +1569,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) ...@@ -1577,8 +1569,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
"corrected sectors" "corrected sectors"
" (%d sectors at %llu on %s)\n", " (%d sectors at %llu on %s)\n",
mdname(mddev), s, mdname(mddev), s,
(unsigned long long)(sect+ (unsigned long long)(
rdev->data_offset), sect + rdev->data_offset),
bdevname(rdev->bdev, b)); bdevname(rdev->bdev, b));
printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n", printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n",
mdname(mddev), mdname(mddev),
...@@ -1590,8 +1582,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) ...@@ -1590,8 +1582,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
"md/raid10:%s: read error corrected" "md/raid10:%s: read error corrected"
" (%d sectors at %llu on %s)\n", " (%d sectors at %llu on %s)\n",
mdname(mddev), s, mdname(mddev), s,
(unsigned long long)(sect+ (unsigned long long)(
rdev->data_offset), sect + rdev->data_offset),
bdevname(rdev->bdev, b)); bdevname(rdev->bdev, b));
} }
...@@ -1641,7 +1633,8 @@ static void raid10d(mddev_t *mddev) ...@@ -1641,7 +1633,8 @@ static void raid10d(mddev_t *mddev)
else if (test_bit(R10BIO_IsRecover, &r10_bio->state)) else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
recovery_request_write(mddev, r10_bio); recovery_request_write(mddev, r10_bio);
else { else {
int mirror; int slot = r10_bio->read_slot;
int mirror = r10_bio->devs[slot].devnum;
/* we got a read error. Maybe the drive is bad. Maybe just /* we got a read error. Maybe the drive is bad. Maybe just
* the block and we can fix it. * the block and we can fix it.
* We freeze all other IO, and try reading the block from * We freeze all other IO, and try reading the block from
...@@ -1655,6 +1648,7 @@ static void raid10d(mddev_t *mddev) ...@@ -1655,6 +1648,7 @@ static void raid10d(mddev_t *mddev)
fix_read_error(conf, mddev, r10_bio); fix_read_error(conf, mddev, r10_bio);
unfreeze_array(conf); unfreeze_array(conf);
} }
rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
bio = r10_bio->devs[r10_bio->read_slot].bio; bio = r10_bio->devs[r10_bio->read_slot].bio;
r10_bio->devs[r10_bio->read_slot].bio = r10_bio->devs[r10_bio->read_slot].bio =
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment