Commit dbe3ece1 authored by Jens Axboe's avatar Jens Axboe

dm: don't reuse bio for flushes

DM currently has a statically allocated bio that it uses to issue empty
flushes. It doesn't submit this bio, it just uses it for maintaining
state while setting up clones. Multiple users can access this bio at the
same time. This wasn't previously an issue, even if it was a bit iffy,
but with the blkg associations it can become one.

We setup the blkg association, then clone bio's and submit, then remove
the blkg assocation again. But since we can have multiple tasks doing
this at the same time, against multiple blkg's, then we can either lose
references to a blkg, or put it twice. The latter causes complaints on
the percpu ref being <= 0 when released, and can cause use-after-free as
well. Ming reports that xfstest generic/475 triggers this:

------------[ cut here ]------------
percpu ref (blkg_release) <= 0 (0) after switching to atomic
WARNING: CPU: 13 PID: 0 at lib/percpu-refcount.c:155 percpu_ref_switch_to_atomic_rcu+0x2c9/0x4a0

Switch to just using an on-stack bio for this, and get rid of the
embedded bio.

Fixes: 5cdf2e3f ("blkcg: associate blkg when associating a device")
Reported-by: default avatarMing Lei <ming.lei@redhat.com>
Tested-by: default avatarMing Lei <ming.lei@redhat.com>
Reviewed-by: default avatarMike Snitzer <snitzer@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 499aeb45
...@@ -106,9 +106,6 @@ struct mapped_device { ...@@ -106,9 +106,6 @@ struct mapped_device {
struct block_device *bdev; struct block_device *bdev;
/* zero-length flush that will be cloned and submitted to targets */
struct bio flush_bio;
struct dm_stats stats; struct dm_stats stats;
/* for blk-mq request-based DM support */ /* for blk-mq request-based DM support */
......
...@@ -1420,11 +1420,11 @@ static int __send_empty_flush(struct clone_info *ci) ...@@ -1420,11 +1420,11 @@ static int __send_empty_flush(struct clone_info *ci)
struct dm_target *ti; struct dm_target *ti;
/* /*
* Empty flush uses a statically initialized bio, &md->flush_bio, as * Empty flush uses a statically initialized bio, as the base for
* the base for cloning. However, blkg association requires that a * cloning. However, blkg association requires that a bdev is
* bdev is associated with a gendisk, which doesn't happen until the * associated with a gendisk, which doesn't happen until the bdev is
* bdev is opened. So, blkg association is done at issue time of the * opened. So, blkg association is done at issue time of the flush
* flush rather than when the device is created in alloc_dev(). * rather than when the device is created in alloc_dev().
*/ */
bio_set_dev(ci->bio, ci->io->md->bdev); bio_set_dev(ci->bio, ci->io->md->bdev);
...@@ -1609,7 +1609,16 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, ...@@ -1609,7 +1609,16 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
init_clone_info(&ci, md, map, bio); init_clone_info(&ci, md, map, bio);
if (bio->bi_opf & REQ_PREFLUSH) { if (bio->bi_opf & REQ_PREFLUSH) {
ci.bio = &ci.io->md->flush_bio; struct bio flush_bio;
/*
* Use an on-stack bio for this, it's safe since we don't
* need to reference it after submit. It's just used as
* the basis for the clone(s).
*/
bio_init(&flush_bio, NULL, 0);
flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
ci.bio = &flush_bio;
ci.sector_count = 0; ci.sector_count = 0;
error = __send_empty_flush(&ci); error = __send_empty_flush(&ci);
/* dec_pending submits any data associated with flush */ /* dec_pending submits any data associated with flush */
...@@ -1665,7 +1674,16 @@ static blk_qc_t __process_bio(struct mapped_device *md, ...@@ -1665,7 +1674,16 @@ static blk_qc_t __process_bio(struct mapped_device *md,
init_clone_info(&ci, md, map, bio); init_clone_info(&ci, md, map, bio);
if (bio->bi_opf & REQ_PREFLUSH) { if (bio->bi_opf & REQ_PREFLUSH) {
ci.bio = &ci.io->md->flush_bio; struct bio flush_bio;
/*
* Use an on-stack bio for this, it's safe since we don't
* need to reference it after submit. It's just used as
* the basis for the clone(s).
*/
bio_init(&flush_bio, NULL, 0);
flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
ci.bio = &flush_bio;
ci.sector_count = 0; ci.sector_count = 0;
error = __send_empty_flush(&ci); error = __send_empty_flush(&ci);
/* dec_pending submits any data associated with flush */ /* dec_pending submits any data associated with flush */
...@@ -1949,9 +1967,6 @@ static struct mapped_device *alloc_dev(int minor) ...@@ -1949,9 +1967,6 @@ static struct mapped_device *alloc_dev(int minor)
if (!md->bdev) if (!md->bdev)
goto bad; goto bad;
bio_init(&md->flush_bio, NULL, 0);
md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
dm_stats_init(&md->stats); dm_stats_init(&md->stats);
/* Populate the mapping, nobody knows we exist yet */ /* Populate the mapping, nobody knows we exist yet */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment