Commit c7333972 authored by Omar Sandoval's avatar Omar Sandoval Committed by David Sterba

btrfs: look at full bi_io_vec for repair decision

Read repair does two things: it finds a good copy of data to return to
the reader, and it corrects the bad copy on disk. If a read of multiple
sectors has an I/O error, repair does an extra "validation" step that
issues a separate read for each sector. This allows us to find the exact
failing sectors and only rewrite those.

This heuristic is implemented in
bio_readpage_error()/btrfs_check_repairable() as:

	failed_bio_pages = failed_bio->bi_iter.bi_size >> PAGE_SHIFT;
	if (failed_bio_pages > 1)
		do validation

However, at this point, bi_iter may have already been advanced. This
means that we'll skip the validation step and rewrite the entire failed
read.

Fix it by getting the actual size from the biovec (which we can do
because this is only called for non-cloned bios, although that will
change in a later commit).

Fixes: 8a2ee44a ("btrfs: look at bi_size for repair decisions")
Reviewed-by: default avatarJohannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: default avatarOmar Sandoval <osandov@fb.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent c36cac28
......@@ -2537,8 +2537,9 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
return 0;
}
bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
struct io_failure_record *failrec, int failed_mirror)
bool btrfs_check_repairable(struct inode *inode, bool needs_validation,
struct io_failure_record *failrec,
int failed_mirror)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int num_copies;
......@@ -2561,7 +2562,7 @@ bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
* a) deliver good data to the caller
* b) correct the bad sectors on disk
*/
if (failed_bio_pages > 1) {
if (needs_validation) {
/*
* to fulfill b), we need to know the exact failing sectors, as
* we don't want to rewrite any more than the failed ones. thus,
......@@ -2633,6 +2634,24 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
return bio;
}
static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio)
{
struct bio_vec *bvec;
u64 len = 0;
int i;
/*
* We need to validate each sector individually if the failed I/O was
* for multiple sectors.
*/
bio_for_each_bvec_all(bvec, bio, i) {
len += bvec->bv_len;
if (len > inode->i_sb->s_blocksize)
return true;
}
return false;
}
/*
* This is a generic handler for readpage errors. If other copies exist, read
* those and write back good data to the failed position. Does not investigate
......@@ -2647,11 +2666,11 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
struct inode *inode = page->mapping->host;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
bool need_validation;
struct bio *bio;
int read_mode = 0;
blk_status_t status;
int ret;
unsigned failed_bio_pages = failed_bio->bi_iter.bi_size >> PAGE_SHIFT;
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
......@@ -2659,13 +2678,15 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
if (ret)
return ret;
if (!btrfs_check_repairable(inode, failed_bio_pages, failrec,
need_validation = btrfs_io_needs_validation(inode, failed_bio);
if (!btrfs_check_repairable(inode, need_validation, failrec,
failed_mirror)) {
free_io_failure(failure_tree, tree, failrec);
return -EIO;
}
if (failed_bio_pages > 1)
if (need_validation)
read_mode |= REQ_FAILFAST_DEV;
phy_offset >>= inode->i_sb->s_blocksize_bits;
......
......@@ -312,8 +312,9 @@ struct io_failure_record {
};
bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
struct io_failure_record *failrec, int fail_mirror);
bool btrfs_check_repairable(struct inode *inode, bool needs_validation,
struct io_failure_record *failrec,
int failed_mirror);
struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
struct io_failure_record *failrec,
struct page *page, int pg_offset, int icsum,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment