Commit d8f05d29 authored by NeilBrown's avatar NeilBrown

md/raid1: record badblocks found during resync etc.

If we find a bad block while writing as part of resync/recovery we
need to report that back to raid1d which must record the bad block,
or fail the device.

Similarly when fixing a read error, a further error should just
record a bad block if possible rather than failing the device.
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
Reviewed-by: default avatarNamhyung Kim <namhyung@gmail.com>
parent cd5ff9a1
...@@ -1386,7 +1386,9 @@ static void end_sync_write(struct bio *bio, int error) ...@@ -1386,7 +1386,9 @@ static void end_sync_write(struct bio *bio, int error)
s += sync_blocks; s += sync_blocks;
sectors_to_go -= sync_blocks; sectors_to_go -= sync_blocks;
} while (sectors_to_go > 0); } while (sectors_to_go > 0);
md_error(mddev, conf->mirrors[mirror].rdev); set_bit(WriteErrorSeen,
&conf->mirrors[mirror].rdev->flags);
set_bit(R1BIO_WriteError, &r1_bio->state);
} else if (is_badblock(conf->mirrors[mirror].rdev, } else if (is_badblock(conf->mirrors[mirror].rdev,
r1_bio->sector, r1_bio->sector,
r1_bio->sectors, r1_bio->sectors,
...@@ -1397,7 +1399,8 @@ static void end_sync_write(struct bio *bio, int error) ...@@ -1397,7 +1399,8 @@ static void end_sync_write(struct bio *bio, int error)
if (atomic_dec_and_test(&r1_bio->remaining)) { if (atomic_dec_and_test(&r1_bio->remaining)) {
int s = r1_bio->sectors; int s = r1_bio->sectors;
if (test_bit(R1BIO_MadeGood, &r1_bio->state)) if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
test_bit(R1BIO_WriteError, &r1_bio->state))
reschedule_retry(r1_bio); reschedule_retry(r1_bio);
else { else {
put_buf(r1_bio); put_buf(r1_bio);
...@@ -1406,6 +1409,20 @@ static void end_sync_write(struct bio *bio, int error) ...@@ -1406,6 +1409,20 @@ static void end_sync_write(struct bio *bio, int error)
} }
} }
static int r1_sync_page_io(mdk_rdev_t *rdev, sector_t sector,
int sectors, struct page *page, int rw)
{
if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
/* success */
return 1;
if (rw == WRITE)
set_bit(WriteErrorSeen, &rdev->flags);
/* need to record an error - either for the block or the device */
if (!rdev_set_badblocks(rdev, sector, sectors, 0))
md_error(rdev->mddev, rdev);
return 0;
}
static int fix_sync_read_error(r1bio_t *r1_bio) static int fix_sync_read_error(r1bio_t *r1_bio)
{ {
/* Try some synchronous reads of other devices to get /* Try some synchronous reads of other devices to get
...@@ -1477,12 +1494,11 @@ static int fix_sync_read_error(r1bio_t *r1_bio) ...@@ -1477,12 +1494,11 @@ static int fix_sync_read_error(r1bio_t *r1_bio)
if (r1_bio->bios[d]->bi_end_io != end_sync_read) if (r1_bio->bios[d]->bi_end_io != end_sync_read)
continue; continue;
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
if (sync_page_io(rdev, sect, s<<9, if (r1_sync_page_io(rdev, sect, s,
bio->bi_io_vec[idx].bv_page, bio->bi_io_vec[idx].bv_page,
WRITE, false) == 0) { WRITE) == 0) {
r1_bio->bios[d]->bi_end_io = NULL; r1_bio->bios[d]->bi_end_io = NULL;
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
md_error(mddev, rdev);
} }
} }
d = start; d = start;
...@@ -1493,11 +1509,9 @@ static int fix_sync_read_error(r1bio_t *r1_bio) ...@@ -1493,11 +1509,9 @@ static int fix_sync_read_error(r1bio_t *r1_bio)
if (r1_bio->bios[d]->bi_end_io != end_sync_read) if (r1_bio->bios[d]->bi_end_io != end_sync_read)
continue; continue;
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
if (sync_page_io(rdev, sect, s<<9, if (r1_sync_page_io(rdev, sect, s,
bio->bi_io_vec[idx].bv_page, bio->bi_io_vec[idx].bv_page,
READ, false) == 0) READ) != 0)
md_error(mddev, rdev);
else
atomic_add(s, &rdev->corrected_errors); atomic_add(s, &rdev->corrected_errors);
} }
sectors -= s; sectors -= s;
...@@ -1682,8 +1696,10 @@ static void fix_read_error(conf_t *conf, int read_disk, ...@@ -1682,8 +1696,10 @@ static void fix_read_error(conf_t *conf, int read_disk,
} while (!success && d != read_disk); } while (!success && d != read_disk);
if (!success) { if (!success) {
/* Cannot read from anywhere -- bye bye array */ /* Cannot read from anywhere - mark it bad */
md_error(mddev, conf->mirrors[read_disk].rdev); mdk_rdev_t *rdev = conf->mirrors[read_disk].rdev;
if (!rdev_set_badblocks(rdev, sect, s, 0))
md_error(mddev, rdev);
break; break;
} }
/* write it back and re-read */ /* write it back and re-read */
...@@ -1694,13 +1710,9 @@ static void fix_read_error(conf_t *conf, int read_disk, ...@@ -1694,13 +1710,9 @@ static void fix_read_error(conf_t *conf, int read_disk,
d--; d--;
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
if (rdev && if (rdev &&
test_bit(In_sync, &rdev->flags)) { test_bit(In_sync, &rdev->flags))
if (sync_page_io(rdev, sect, s<<9, r1_sync_page_io(rdev, sect, s,
conf->tmppage, WRITE, false) conf->tmppage, WRITE);
== 0)
/* Well, this device is dead */
md_error(mddev, rdev);
}
} }
d = start; d = start;
while (d != read_disk) { while (d != read_disk) {
...@@ -1711,12 +1723,8 @@ static void fix_read_error(conf_t *conf, int read_disk, ...@@ -1711,12 +1723,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
if (rdev && if (rdev &&
test_bit(In_sync, &rdev->flags)) { test_bit(In_sync, &rdev->flags)) {
if (sync_page_io(rdev, sect, s<<9, if (r1_sync_page_io(rdev, sect, s,
conf->tmppage, READ, false) conf->tmppage, READ)) {
== 0)
/* Well, this device is dead */
md_error(mddev, rdev);
else {
atomic_add(s, &rdev->corrected_errors); atomic_add(s, &rdev->corrected_errors);
printk(KERN_INFO printk(KERN_INFO
"md/raid1:%s: read error corrected " "md/raid1:%s: read error corrected "
...@@ -1860,20 +1868,33 @@ static void raid1d(mddev_t *mddev) ...@@ -1860,20 +1868,33 @@ static void raid1d(mddev_t *mddev)
mddev = r1_bio->mddev; mddev = r1_bio->mddev;
conf = mddev->private; conf = mddev->private;
if (test_bit(R1BIO_IsSync, &r1_bio->state)) { if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
if (test_bit(R1BIO_MadeGood, &r1_bio->state)) { if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
test_bit(R1BIO_WriteError, &r1_bio->state)) {
int m; int m;
int s = r1_bio->sectors; int s = r1_bio->sectors;
for (m = 0; m < conf->raid_disks ; m++) { for (m = 0; m < conf->raid_disks ; m++) {
mdk_rdev_t *rdev
= conf->mirrors[m].rdev;
struct bio *bio = r1_bio->bios[m]; struct bio *bio = r1_bio->bios[m];
if (bio->bi_end_io != NULL && if (bio->bi_end_io == NULL)
test_bit(BIO_UPTODATE, continue;
if (test_bit(BIO_UPTODATE,
&bio->bi_flags)) { &bio->bi_flags)) {
rdev = conf->mirrors[m].rdev;
rdev_clear_badblocks( rdev_clear_badblocks(
rdev, rdev,
r1_bio->sector, r1_bio->sector,
r1_bio->sectors); r1_bio->sectors);
} }
if (!test_bit(BIO_UPTODATE,
&bio->bi_flags) &&
test_bit(R1BIO_WriteError,
&r1_bio->state)) {
if (!rdev_set_badblocks(
rdev,
r1_bio->sector,
r1_bio->sectors, 0))
md_error(mddev, rdev);
}
} }
put_buf(r1_bio); put_buf(r1_bio);
md_done_sync(mddev, s, 1); md_done_sync(mddev, s, 1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment