Commit c3a62baf authored by Christoph Hellwig's avatar Christoph Hellwig Committed by David Sterba

btrfs: use chained bios when cloning

The stripes_pending in the btrfs_io_context counts number of inflight
low-level bios for an upper btrfs_bio.  For reads this is generally
one as reads are never cloned, while for writes we can trivially use
the bio remaining mechanisms that is used for chained bios.

To be able to make use of that mechanism, split out a separate trivial
end_io handler for the cloned bios that does a minimal amount of error
tracking and which then calls bio_endio on the original bio to transfer
control to that, with the remaining counter making sure it is completed
last.  This then allows to merge btrfs_end_bioc into the original bio
bi_end_io handler.

To make this all work all error handling needs to happen through the
bi_end_io handler, which requires a small amount of reshuffling in
submit_stripe_bio so that the bio is cloned already by the time the
suitability of the device is checked.

This reduces the size of the btrfs_io_context and prepares splitting
the btrfs_bio at the stripe boundary.
Reviewed-by: default avatarNikolay Borisov <nborisov@suse.com>
Reviewed-by: default avatarJohannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: default avatarAnand Jain <anand.jain@oracle.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 2bbc72f1
......@@ -5898,7 +5898,6 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
sizeof(u64) * (total_stripes),
GFP_NOFS|__GFP_NOFAIL);
atomic_set(&bioc->error, 0);
refcount_set(&bioc->refs, 1);
bioc->fs_info = fs_info;
......@@ -6653,7 +6652,21 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
bio_trim(bio, offset >> 9, size >> 9);
bbio->iter = bio->bi_iter;
return bio;
}
static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
{
if (!dev || !dev->bdev)
return;
if (bio->bi_status != BLK_STS_IOERR && bio->bi_status != BLK_STS_TARGET)
return;
if (btrfs_op(bio) == BTRFS_MAP_WRITE)
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
if (!(bio->bi_opf & REQ_RAHEAD))
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
if (bio->bi_opf & REQ_PREFLUSH)
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS);
}
static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_io_context *bioc)
......@@ -6671,62 +6684,54 @@ static void btrfs_end_bio_work(struct work_struct *work)
bio_endio(&bbio->bio);
}
static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async)
static void btrfs_end_bio(struct bio *bio)
{
struct bio *orig_bio = bioc->orig_bio;
struct btrfs_bio *bbio = btrfs_bio(orig_bio);
struct btrfs_io_stripe *stripe = bio->bi_private;
struct btrfs_io_context *bioc = stripe->bioc;
struct btrfs_bio *bbio = btrfs_bio(bio);
btrfs_bio_counter_dec(bioc->fs_info);
if (bio->bi_status) {
atomic_inc(&bioc->error);
btrfs_log_dev_io_error(bio, stripe->dev);
}
bbio->mirror_num = bioc->mirror_num;
orig_bio->bi_private = bioc->private;
orig_bio->bi_end_io = bioc->end_io;
bio->bi_end_io = bioc->end_io;
bio->bi_private = bioc->private;
/*
* Only send an error to the higher layers if it is beyond the tolerance
* threshold.
*/
if (atomic_read(&bioc->error) > bioc->max_errors)
orig_bio->bi_status = BLK_STS_IOERR;
bio->bi_status = BLK_STS_IOERR;
else
orig_bio->bi_status = BLK_STS_OK;
bio->bi_status = BLK_STS_OK;
if (btrfs_op(orig_bio) == BTRFS_MAP_READ && async) {
if (btrfs_op(bio) == BTRFS_MAP_READ) {
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
queue_work(btrfs_end_io_wq(bioc), &bbio->end_io_work);
} else {
bio_endio(orig_bio);
bio_endio(bio);
}
btrfs_put_bioc(bioc);
}
static void btrfs_end_bio(struct bio *bio)
static void btrfs_clone_write_end_io(struct bio *bio)
{
struct btrfs_io_stripe *stripe = bio->bi_private;
struct btrfs_io_context *bioc = stripe->bioc;
if (bio->bi_status) {
atomic_inc(&bioc->error);
if (bio->bi_status == BLK_STS_IOERR ||
bio->bi_status == BLK_STS_TARGET) {
if (btrfs_op(bio) == BTRFS_MAP_WRITE)
btrfs_dev_stat_inc_and_print(stripe->dev,
BTRFS_DEV_STAT_WRITE_ERRS);
else if (!(bio->bi_opf & REQ_RAHEAD))
btrfs_dev_stat_inc_and_print(stripe->dev,
BTRFS_DEV_STAT_READ_ERRS);
if (bio->bi_opf & REQ_PREFLUSH)
btrfs_dev_stat_inc_and_print(stripe->dev,
BTRFS_DEV_STAT_FLUSH_ERRS);
}
atomic_inc(&stripe->bioc->error);
btrfs_log_dev_io_error(bio, stripe->dev);
}
if (bio != bioc->orig_bio)
bio_put(bio);
if (atomic_dec_and_test(&bioc->stripes_pending))
btrfs_end_bioc(bioc, true);
/* Pass on control to the original bio this one was cloned from */
bio_endio(stripe->bioc->orig_bio);
bio_put(bio);
}
static void submit_stripe_bio(struct btrfs_io_context *bioc,
......@@ -6737,28 +6742,30 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc,
u64 physical = bioc->stripes[dev_nr].physical;
struct bio *bio;
if (!dev || !dev->bdev ||
test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
(btrfs_op(orig_bio) == BTRFS_MAP_WRITE &&
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
atomic_inc(&bioc->error);
if (atomic_dec_and_test(&bioc->stripes_pending))
btrfs_end_bioc(bioc, false);
return;
}
if (clone) {
bio = bio_alloc_clone(dev->bdev, orig_bio, GFP_NOFS, &fs_bio_set);
bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
bio_inc_remaining(orig_bio);
bio->bi_end_io = btrfs_clone_write_end_io;
} else {
bio = orig_bio;
bio_set_dev(bio, dev->bdev);
btrfs_bio(bio)->device = dev;
bio->bi_end_io = btrfs_end_bio;
}
bioc->stripes[dev_nr].bioc = bioc;
bio->bi_private = &bioc->stripes[dev_nr];
bio->bi_end_io = btrfs_end_bio;
bio->bi_iter.bi_sector = physical >> 9;
if (!dev || !dev->bdev ||
test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
(btrfs_op(bio) == BTRFS_MAP_WRITE &&
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
bio_io_error(bio);
return;
}
bio_set_dev(bio, dev->bdev);
/*
* For zone append writing, bi_sector must point the beginning of the
* zone
......@@ -6807,7 +6814,6 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror
bioc->orig_bio = bio;
bioc->private = bio->bi_private;
bioc->end_io = bio->bi_end_io;
atomic_set(&bioc->stripes_pending, total_devs);
if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) {
......
......@@ -457,7 +457,6 @@ struct btrfs_discard_stripe {
*/
struct btrfs_io_context {
refcount_t refs;
atomic_t stripes_pending;
struct btrfs_fs_info *fs_info;
u64 map_type; /* get from map_lookup->type */
bio_end_io_t *end_io;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment