Commit 67f2a930 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-4.20/dm-fixes' of...

Merge tag 'for-4.20/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Fix DM cache metadata to verify that a cache has block before trying
   to continue with operation that requires them.

 - Fix bio-based DM core's dm_make_request() to properly impose device
   limits on individual bios by making use of blk_queue_split().

 - Fix long-standing race with how DM thinp notified userspace of
   thin-pool mode state changes before they were actually made.

 - Fix the zoned target's bio completion handling; this is a fairly
   invassive fix at this stage but it is localized to the zoned target.
   Any zoned target users will benefit from this fix.

* tag 'for-4.20/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm thin: bump target version
  dm thin: send event about thin-pool state change _after_ making it
  dm zoned: Fix target BIO completion handling
  dm: call blk_queue_split() to impose device limits on bios
  dm cache metadata: verify cache has blocks in blocks_are_clean_separate_dirty()
parents 14a996c3 2af6c070
...@@ -930,6 +930,10 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd, ...@@ -930,6 +930,10 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
bool dirty_flag; bool dirty_flag;
*result = true; *result = true;
if (from_cblock(cmd->cache_blocks) == 0)
/* Nothing to do */
return 0;
r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root, r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
from_cblock(cmd->cache_blocks), &cmd->dirty_cursor); from_cblock(cmd->cache_blocks), &cmd->dirty_cursor);
if (r) { if (r) {
......
...@@ -195,7 +195,7 @@ static void throttle_unlock(struct throttle *t) ...@@ -195,7 +195,7 @@ static void throttle_unlock(struct throttle *t)
struct dm_thin_new_mapping; struct dm_thin_new_mapping;
/* /*
* The pool runs in 4 modes. Ordered in degraded order for comparisons. * The pool runs in various modes. Ordered in degraded order for comparisons.
*/ */
enum pool_mode { enum pool_mode {
PM_WRITE, /* metadata may be changed */ PM_WRITE, /* metadata may be changed */
...@@ -282,9 +282,38 @@ struct pool { ...@@ -282,9 +282,38 @@ struct pool {
mempool_t mapping_pool; mempool_t mapping_pool;
}; };
static enum pool_mode get_pool_mode(struct pool *pool);
static void metadata_operation_failed(struct pool *pool, const char *op, int r); static void metadata_operation_failed(struct pool *pool, const char *op, int r);
static enum pool_mode get_pool_mode(struct pool *pool)
{
return pool->pf.mode;
}
static void notify_of_pool_mode_change(struct pool *pool)
{
const char *descs[] = {
"write",
"out-of-data-space",
"read-only",
"read-only",
"fail"
};
const char *extra_desc = NULL;
enum pool_mode mode = get_pool_mode(pool);
if (mode == PM_OUT_OF_DATA_SPACE) {
if (!pool->pf.error_if_no_space)
extra_desc = " (queue IO)";
else
extra_desc = " (error IO)";
}
dm_table_event(pool->ti->table);
DMINFO("%s: switching pool to %s%s mode",
dm_device_name(pool->pool_md),
descs[(int)mode], extra_desc ? : "");
}
/* /*
* Target context for a pool. * Target context for a pool.
*/ */
...@@ -2351,8 +2380,6 @@ static void do_waker(struct work_struct *ws) ...@@ -2351,8 +2380,6 @@ static void do_waker(struct work_struct *ws)
queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
} }
static void notify_of_pool_mode_change_to_oods(struct pool *pool);
/* /*
* We're holding onto IO to allow userland time to react. After the * We're holding onto IO to allow userland time to react. After the
* timeout either the pool will have been resized (and thus back in * timeout either the pool will have been resized (and thus back in
...@@ -2365,7 +2392,7 @@ static void do_no_space_timeout(struct work_struct *ws) ...@@ -2365,7 +2392,7 @@ static void do_no_space_timeout(struct work_struct *ws)
if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) { if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
pool->pf.error_if_no_space = true; pool->pf.error_if_no_space = true;
notify_of_pool_mode_change_to_oods(pool); notify_of_pool_mode_change(pool);
error_retry_list_with_code(pool, BLK_STS_NOSPC); error_retry_list_with_code(pool, BLK_STS_NOSPC);
} }
} }
...@@ -2433,26 +2460,6 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *)) ...@@ -2433,26 +2460,6 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *))
/*----------------------------------------------------------------*/ /*----------------------------------------------------------------*/
static enum pool_mode get_pool_mode(struct pool *pool)
{
return pool->pf.mode;
}
static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode)
{
dm_table_event(pool->ti->table);
DMINFO("%s: switching pool to %s mode",
dm_device_name(pool->pool_md), new_mode);
}
static void notify_of_pool_mode_change_to_oods(struct pool *pool)
{
if (!pool->pf.error_if_no_space)
notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)");
else
notify_of_pool_mode_change(pool, "out-of-data-space (error IO)");
}
static bool passdown_enabled(struct pool_c *pt) static bool passdown_enabled(struct pool_c *pt)
{ {
return pt->adjusted_pf.discard_passdown; return pt->adjusted_pf.discard_passdown;
...@@ -2501,8 +2508,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) ...@@ -2501,8 +2508,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
switch (new_mode) { switch (new_mode) {
case PM_FAIL: case PM_FAIL:
if (old_mode != new_mode)
notify_of_pool_mode_change(pool, "failure");
dm_pool_metadata_read_only(pool->pmd); dm_pool_metadata_read_only(pool->pmd);
pool->process_bio = process_bio_fail; pool->process_bio = process_bio_fail;
pool->process_discard = process_bio_fail; pool->process_discard = process_bio_fail;
...@@ -2516,8 +2521,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) ...@@ -2516,8 +2521,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
case PM_OUT_OF_METADATA_SPACE: case PM_OUT_OF_METADATA_SPACE:
case PM_READ_ONLY: case PM_READ_ONLY:
if (!is_read_only_pool_mode(old_mode))
notify_of_pool_mode_change(pool, "read-only");
dm_pool_metadata_read_only(pool->pmd); dm_pool_metadata_read_only(pool->pmd);
pool->process_bio = process_bio_read_only; pool->process_bio = process_bio_read_only;
pool->process_discard = process_bio_success; pool->process_discard = process_bio_success;
...@@ -2538,8 +2541,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) ...@@ -2538,8 +2541,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
* alarming rate. Adjust your low water mark if you're * alarming rate. Adjust your low water mark if you're
* frequently seeing this mode. * frequently seeing this mode.
*/ */
if (old_mode != new_mode)
notify_of_pool_mode_change_to_oods(pool);
pool->out_of_data_space = true; pool->out_of_data_space = true;
pool->process_bio = process_bio_read_only; pool->process_bio = process_bio_read_only;
pool->process_discard = process_discard_bio; pool->process_discard = process_discard_bio;
...@@ -2552,8 +2553,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) ...@@ -2552,8 +2553,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
break; break;
case PM_WRITE: case PM_WRITE:
if (old_mode != new_mode)
notify_of_pool_mode_change(pool, "write");
if (old_mode == PM_OUT_OF_DATA_SPACE) if (old_mode == PM_OUT_OF_DATA_SPACE)
cancel_delayed_work_sync(&pool->no_space_timeout); cancel_delayed_work_sync(&pool->no_space_timeout);
pool->out_of_data_space = false; pool->out_of_data_space = false;
...@@ -2573,6 +2572,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) ...@@ -2573,6 +2572,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
* doesn't cause an unexpected mode transition on resume. * doesn't cause an unexpected mode transition on resume.
*/ */
pt->adjusted_pf.mode = new_mode; pt->adjusted_pf.mode = new_mode;
if (old_mode != new_mode)
notify_of_pool_mode_change(pool);
} }
static void abort_transaction(struct pool *pool) static void abort_transaction(struct pool *pool)
...@@ -4023,7 +4025,7 @@ static struct target_type pool_target = { ...@@ -4023,7 +4025,7 @@ static struct target_type pool_target = {
.name = "thin-pool", .name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE, DM_TARGET_IMMUTABLE,
.version = {1, 20, 0}, .version = {1, 21, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = pool_ctr, .ctr = pool_ctr,
.dtr = pool_dtr, .dtr = pool_dtr,
...@@ -4397,7 +4399,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) ...@@ -4397,7 +4399,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type thin_target = { static struct target_type thin_target = {
.name = "thin", .name = "thin",
.version = {1, 20, 0}, .version = {1, 21, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = thin_ctr, .ctr = thin_ctr,
.dtr = thin_dtr, .dtr = thin_dtr,
......
...@@ -20,7 +20,6 @@ struct dmz_bioctx { ...@@ -20,7 +20,6 @@ struct dmz_bioctx {
struct dm_zone *zone; struct dm_zone *zone;
struct bio *bio; struct bio *bio;
refcount_t ref; refcount_t ref;
blk_status_t status;
}; };
/* /*
...@@ -78,65 +77,66 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status) ...@@ -78,65 +77,66 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status)
{ {
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
if (bioctx->status == BLK_STS_OK && status != BLK_STS_OK) if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK)
bioctx->status = status; bio->bi_status = status;
if (refcount_dec_and_test(&bioctx->ref)) {
struct dm_zone *zone = bioctx->zone;
if (zone) {
if (bio->bi_status != BLK_STS_OK &&
bio_op(bio) == REQ_OP_WRITE &&
dmz_is_seq(zone))
set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
dmz_deactivate_zone(zone);
}
bio_endio(bio); bio_endio(bio);
}
} }
/* /*
* Partial clone read BIO completion callback. This terminates the * Completion callback for an internally cloned target BIO. This terminates the
* target BIO when there are no more references to its context. * target BIO when there are no more references to its context.
*/ */
static void dmz_read_bio_end_io(struct bio *bio) static void dmz_clone_endio(struct bio *clone)
{ {
struct dmz_bioctx *bioctx = bio->bi_private; struct dmz_bioctx *bioctx = clone->bi_private;
blk_status_t status = bio->bi_status; blk_status_t status = clone->bi_status;
bio_put(bio); bio_put(clone);
dmz_bio_endio(bioctx->bio, status); dmz_bio_endio(bioctx->bio, status);
} }
/* /*
* Issue a BIO to a zone. The BIO may only partially process the * Issue a clone of a target BIO. The clone may only partially process the
* original target BIO. * original target BIO.
*/ */
static int dmz_submit_read_bio(struct dmz_target *dmz, struct dm_zone *zone, static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
struct bio *bio, sector_t chunk_block, struct bio *bio, sector_t chunk_block,
unsigned int nr_blocks) unsigned int nr_blocks)
{ {
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
sector_t sector;
struct bio *clone; struct bio *clone;
/* BIO remap sector */
sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
/* If the read is not partial, there is no need to clone the BIO */
if (nr_blocks == dmz_bio_blocks(bio)) {
/* Setup and submit the BIO */
bio->bi_iter.bi_sector = sector;
refcount_inc(&bioctx->ref);
generic_make_request(bio);
return 0;
}
/* Partial BIO: we need to clone the BIO */
clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set); clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set);
if (!clone) if (!clone)
return -ENOMEM; return -ENOMEM;
/* Setup the clone */ bio_set_dev(clone, dmz->dev->bdev);
clone->bi_iter.bi_sector = sector; clone->bi_iter.bi_sector =
dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
clone->bi_iter.bi_size = dmz_blk2sect(nr_blocks) << SECTOR_SHIFT; clone->bi_iter.bi_size = dmz_blk2sect(nr_blocks) << SECTOR_SHIFT;
clone->bi_end_io = dmz_read_bio_end_io; clone->bi_end_io = dmz_clone_endio;
clone->bi_private = bioctx; clone->bi_private = bioctx;
bio_advance(bio, clone->bi_iter.bi_size); bio_advance(bio, clone->bi_iter.bi_size);
/* Submit the clone */
refcount_inc(&bioctx->ref); refcount_inc(&bioctx->ref);
generic_make_request(clone); generic_make_request(clone);
if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
zone->wp_block += nr_blocks;
return 0; return 0;
} }
...@@ -214,7 +214,7 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone, ...@@ -214,7 +214,7 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
if (nr_blocks) { if (nr_blocks) {
/* Valid blocks found: read them */ /* Valid blocks found: read them */
nr_blocks = min_t(unsigned int, nr_blocks, end_block - chunk_block); nr_blocks = min_t(unsigned int, nr_blocks, end_block - chunk_block);
ret = dmz_submit_read_bio(dmz, rzone, bio, chunk_block, nr_blocks); ret = dmz_submit_bio(dmz, rzone, bio, chunk_block, nr_blocks);
if (ret) if (ret)
return ret; return ret;
chunk_block += nr_blocks; chunk_block += nr_blocks;
...@@ -228,25 +228,6 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone, ...@@ -228,25 +228,6 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
return 0; return 0;
} }
/*
* Issue a write BIO to a zone.
*/
static void dmz_submit_write_bio(struct dmz_target *dmz, struct dm_zone *zone,
struct bio *bio, sector_t chunk_block,
unsigned int nr_blocks)
{
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
/* Setup and submit the BIO */
bio_set_dev(bio, dmz->dev->bdev);
bio->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
refcount_inc(&bioctx->ref);
generic_make_request(bio);
if (dmz_is_seq(zone))
zone->wp_block += nr_blocks;
}
/* /*
* Write blocks directly in a data zone, at the write pointer. * Write blocks directly in a data zone, at the write pointer.
* If a buffer zone is assigned, invalidate the blocks written * If a buffer zone is assigned, invalidate the blocks written
...@@ -265,7 +246,9 @@ static int dmz_handle_direct_write(struct dmz_target *dmz, ...@@ -265,7 +246,9 @@ static int dmz_handle_direct_write(struct dmz_target *dmz,
return -EROFS; return -EROFS;
/* Submit write */ /* Submit write */
dmz_submit_write_bio(dmz, zone, bio, chunk_block, nr_blocks); ret = dmz_submit_bio(dmz, zone, bio, chunk_block, nr_blocks);
if (ret)
return ret;
/* /*
* Validate the blocks in the data zone and invalidate * Validate the blocks in the data zone and invalidate
...@@ -301,7 +284,9 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz, ...@@ -301,7 +284,9 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz,
return -EROFS; return -EROFS;
/* Submit write */ /* Submit write */
dmz_submit_write_bio(dmz, bzone, bio, chunk_block, nr_blocks); ret = dmz_submit_bio(dmz, bzone, bio, chunk_block, nr_blocks);
if (ret)
return ret;
/* /*
* Validate the blocks in the buffer zone * Validate the blocks in the buffer zone
...@@ -600,7 +585,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) ...@@ -600,7 +585,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
bioctx->zone = NULL; bioctx->zone = NULL;
bioctx->bio = bio; bioctx->bio = bio;
refcount_set(&bioctx->ref, 1); refcount_set(&bioctx->ref, 1);
bioctx->status = BLK_STS_OK;
/* Set the BIO pending in the flush list */ /* Set the BIO pending in the flush list */
if (!nr_sectors && bio_op(bio) == REQ_OP_WRITE) { if (!nr_sectors && bio_op(bio) == REQ_OP_WRITE) {
...@@ -623,35 +607,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) ...@@ -623,35 +607,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_SUBMITTED; return DM_MAPIO_SUBMITTED;
} }
/*
* Completed target BIO processing.
*/
static int dmz_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error)
{
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
if (bioctx->status == BLK_STS_OK && *error)
bioctx->status = *error;
if (!refcount_dec_and_test(&bioctx->ref))
return DM_ENDIO_INCOMPLETE;
/* Done */
bio->bi_status = bioctx->status;
if (bioctx->zone) {
struct dm_zone *zone = bioctx->zone;
if (*error && bio_op(bio) == REQ_OP_WRITE) {
if (dmz_is_seq(zone))
set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
}
dmz_deactivate_zone(zone);
}
return DM_ENDIO_DONE;
}
/* /*
* Get zoned device information. * Get zoned device information.
*/ */
...@@ -946,7 +901,6 @@ static struct target_type dmz_type = { ...@@ -946,7 +901,6 @@ static struct target_type dmz_type = {
.ctr = dmz_ctr, .ctr = dmz_ctr,
.dtr = dmz_dtr, .dtr = dmz_dtr,
.map = dmz_map, .map = dmz_map,
.end_io = dmz_end_io,
.io_hints = dmz_io_hints, .io_hints = dmz_io_hints,
.prepare_ioctl = dmz_prepare_ioctl, .prepare_ioctl = dmz_prepare_ioctl,
.postsuspend = dmz_suspend, .postsuspend = dmz_suspend,
......
...@@ -1593,6 +1593,8 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, ...@@ -1593,6 +1593,8 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
return ret; return ret;
} }
blk_queue_split(md->queue, &bio);
init_clone_info(&ci, md, map, bio); init_clone_info(&ci, md, map, bio);
if (bio->bi_opf & REQ_PREFLUSH) { if (bio->bi_opf & REQ_PREFLUSH) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment