Commit 7dd76d1f authored by Ming Lei's avatar Ming Lei Committed by Mike Snitzer

dm: improve bio splitting and associated IO accounting

The current DM code (ab)uses late assignment of dm_io->orig_bio (after
__map_bio() returns and any bio splitting is complete) to indicate the
FS bio has been processed and can be accounted. This results in
awkward waiting until ->orig_bio is set in dm_submit_bio_remap().

Also the bio splitting was implemented using bio_split()+bio_chain()
-- a well-worn pattern but it requires bio cloning purely for the
benefit of more natural IO accounting.  The bio_split() result was
stored in ->orig_bio to represent the mapped part of the original FS
bio.

DM has switched to the bdev based IO accounting interface.  DM's IO
accounting can be implemented in terms of the original FS bio (now
stored early in ->orig_bio) via access to its sectors/bio_op. And
if/when splitting is needed, set a new DM_IO_WAS_SPLIT flag and use
new dm_io fields of .sector_offset & .sectors to allow IO accounting
for split bios _without_ needing to clone a new bio to store in
->orig_bio.
Signed-off-by: default avatarMing Lei <ming.lei@redhat.com>
Co-developed-by: default avatarMike Snitzer <snitzer@kernel.org>
Signed-off-by: default avatarMike Snitzer <snitzer@kernel.org>
parent d3de6d12
...@@ -267,7 +267,12 @@ struct dm_io { ...@@ -267,7 +267,12 @@ struct dm_io {
blk_status_t status; blk_status_t status;
atomic_t io_count; atomic_t io_count;
struct mapped_device *md; struct mapped_device *md;
/* The three fields represent mapped part of original bio */
struct bio *orig_bio; struct bio *orig_bio;
unsigned int sector_offset; /* offset to end of orig_bio */
unsigned int sectors;
/* last member of dm_target_io is 'struct bio' */ /* last member of dm_target_io is 'struct bio' */
struct dm_target_io tio; struct dm_target_io tio;
}; };
...@@ -277,7 +282,8 @@ struct dm_io { ...@@ -277,7 +282,8 @@ struct dm_io {
*/ */
enum { enum {
DM_IO_START_ACCT, DM_IO_START_ACCT,
DM_IO_ACCOUNTED DM_IO_ACCOUNTED,
DM_IO_WAS_SPLIT
}; };
static inline bool dm_io_flagged(struct dm_io *io, unsigned int bit) static inline bool dm_io_flagged(struct dm_io *io, unsigned int bit)
......
...@@ -516,8 +516,10 @@ static void dm_io_acct(struct dm_io *io, bool end) ...@@ -516,8 +516,10 @@ static void dm_io_acct(struct dm_io *io, bool end)
*/ */
if (bio_is_flush_with_data(bio)) if (bio_is_flush_with_data(bio))
sectors = 0; sectors = 0;
else else if (likely(!(dm_io_flagged(io, DM_IO_WAS_SPLIT))))
sectors = bio_sectors(bio); sectors = bio_sectors(bio);
else
sectors = io->sectors;
if (!end) if (!end)
bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio), bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio),
...@@ -526,10 +528,18 @@ static void dm_io_acct(struct dm_io *io, bool end) ...@@ -526,10 +528,18 @@ static void dm_io_acct(struct dm_io *io, bool end)
bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time); bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
if (static_branch_unlikely(&stats_enabled) && if (static_branch_unlikely(&stats_enabled) &&
unlikely(dm_stats_used(&md->stats))) unlikely(dm_stats_used(&md->stats))) {
sector_t sector;
if (likely(!dm_io_flagged(io, DM_IO_WAS_SPLIT)))
sector = bio->bi_iter.bi_sector;
else
sector = bio_end_sector(bio) - io->sector_offset;
dm_stats_account_io(&md->stats, bio_data_dir(bio), dm_stats_account_io(&md->stats, bio_data_dir(bio),
bio->bi_iter.bi_sector, sectors, sector, sectors,
end, start_time, stats_aux); end, start_time, stats_aux);
}
} }
static void __dm_start_io_acct(struct dm_io *io) static void __dm_start_io_acct(struct dm_io *io)
...@@ -582,7 +592,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) ...@@ -582,7 +592,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
io->status = BLK_STS_OK; io->status = BLK_STS_OK;
atomic_set(&io->io_count, 1); atomic_set(&io->io_count, 1);
this_cpu_inc(*md->pending_io); this_cpu_inc(*md->pending_io);
io->orig_bio = NULL; io->orig_bio = bio;
io->md = md; io->md = md;
io->map_task = current; io->map_task = current;
spin_lock_init(&io->lock); spin_lock_init(&io->lock);
...@@ -1219,6 +1229,13 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) ...@@ -1219,6 +1229,13 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
*tio->len_ptr -= bio_sectors - n_sectors; *tio->len_ptr -= bio_sectors - n_sectors;
bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT; bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
/*
* __split_and_process_bio() may have already saved mapped part
* for accounting but it is being reduced so update accordingly.
*/
dm_io_set_flag(tio->io, DM_IO_WAS_SPLIT);
tio->io->sectors = n_sectors;
} }
EXPORT_SYMBOL_GPL(dm_accept_partial_bio); EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
...@@ -1257,13 +1274,6 @@ void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone) ...@@ -1257,13 +1274,6 @@ void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone)
/* Still in target's map function */ /* Still in target's map function */
dm_io_set_flag(io, DM_IO_START_ACCT); dm_io_set_flag(io, DM_IO_START_ACCT);
} else { } else {
/*
* Called by another thread, managed by DM target,
* wait for dm_split_and_process_bio() to store
* io->orig_bio
*/
while (unlikely(!smp_load_acquire(&io->orig_bio)))
msleep(1);
dm_start_io_acct(io, clone); dm_start_io_acct(io, clone);
} }
...@@ -1357,6 +1367,31 @@ static void __map_bio(struct bio *clone) ...@@ -1357,6 +1367,31 @@ static void __map_bio(struct bio *clone)
} }
} }
static void setup_split_accounting(struct clone_info *ci, unsigned len)
{
struct dm_io *io = ci->io;
if (ci->sector_count > len) {
/*
* Split needed, save the mapped part for accounting.
* NOTE: dm_accept_partial_bio() will update accordingly.
*/
dm_io_set_flag(io, DM_IO_WAS_SPLIT);
io->sectors = len;
}
if (static_branch_unlikely(&stats_enabled) &&
unlikely(dm_stats_used(&io->md->stats))) {
/*
* Save bi_sector in terms of its offset from end of
* original bio, only needed for DM-stats' benefit.
* - saved regardless of whether split needed so that
* dm_accept_partial_bio() doesn't need to.
*/
io->sector_offset = bio_end_sector(ci->bio) - ci->sector;
}
}
static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
struct dm_target *ti, unsigned num_bios) struct dm_target *ti, unsigned num_bios)
{ {
...@@ -1396,6 +1431,8 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, ...@@ -1396,6 +1431,8 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
case 0: case 0:
break; break;
case 1: case 1:
if (len)
setup_split_accounting(ci, *len);
clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
__map_bio(clone); __map_bio(clone);
break; break;
...@@ -1559,6 +1596,7 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) ...@@ -1559,6 +1596,7 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
ci->submit_as_polled = ci->bio->bi_opf & REQ_POLLED; ci->submit_as_polled = ci->bio->bi_opf & REQ_POLLED;
len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count); len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count);
setup_split_accounting(ci, len);
clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO); clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO);
__map_bio(clone); __map_bio(clone);
...@@ -1592,7 +1630,6 @@ static void dm_split_and_process_bio(struct mapped_device *md, ...@@ -1592,7 +1630,6 @@ static void dm_split_and_process_bio(struct mapped_device *md,
{ {
struct clone_info ci; struct clone_info ci;
struct dm_io *io; struct dm_io *io;
struct bio *orig_bio = NULL;
blk_status_t error = BLK_STS_OK; blk_status_t error = BLK_STS_OK;
init_clone_info(&ci, md, map, bio); init_clone_info(&ci, md, map, bio);
...@@ -1608,23 +1645,15 @@ static void dm_split_and_process_bio(struct mapped_device *md, ...@@ -1608,23 +1645,15 @@ static void dm_split_and_process_bio(struct mapped_device *md,
io->map_task = NULL; io->map_task = NULL;
if (error || !ci.sector_count) if (error || !ci.sector_count)
goto out; goto out;
/* /*
* Remainder must be passed to submit_bio_noacct() so it gets handled * Remainder must be passed to submit_bio_noacct() so it gets handled
* *after* bios already submitted have been completely processed. * *after* bios already submitted have been completely processed.
* We take a clone of the original to store in io->orig_bio to be
* used by dm_end_io_acct() and for dm_io_complete() to use for
* completion handling.
*/ */
orig_bio = bio_split(bio, bio_sectors(bio) - ci.sector_count, bio_trim(bio, io->sectors, ci.sector_count);
GFP_NOIO, &md->queue->bio_split); trace_block_split(bio, bio->bi_iter.bi_sector);
bio_chain(orig_bio, bio); bio_inc_remaining(bio);
trace_block_split(orig_bio, bio->bi_iter.bi_sector);
submit_bio_noacct(bio); submit_bio_noacct(bio);
out: out:
if (!orig_bio)
orig_bio = bio;
smp_store_release(&io->orig_bio, orig_bio);
if (dm_io_flagged(io, DM_IO_START_ACCT)) if (dm_io_flagged(io, DM_IO_START_ACCT))
dm_start_io_acct(io, NULL); dm_start_io_acct(io, NULL);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment