Commit bb37d772 authored by Damien Le Moal's avatar Damien Le Moal Committed by Mike Snitzer

dm: introduce zone append emulation

For zoned targets that cannot support zone append operations, implement
an emulation using regular write operations. If the original BIO
submitted by the user is a zone append operation, change its clone into
a regular write operation directed at the target zone write pointer
position.

To do so, an array of write pointer offsets (write pointer position
relative to the start of a zone) is added to struct mapped_device. All
operations that modify a sequential zone write pointer (writes, zone
reset, zone finish and zone append) are intersepted in __map_bio() and
processed using the new functions dm_zone_map_bio().

Detection of the target ability to natively support zone append
operations is done from dm_table_set_restrictions() by calling the
function dm_set_zones_restrictions(). A target that does not support
zone append operation, either by explicitly declaring it using the new
struct dm_target field zone_append_not_supported, or because the device
table contains a non-zoned device, has its mapped device marked with the
new flag DMF_ZONE_APPEND_EMULATED. The helper function
dm_emulate_zone_append() is introduced to test a mapped device for this
new flag.

Atomicity of the zones write pointer tracking and updates is done using
a zone write locking mechanism based on a bitmap. This is similar to
the block layer method but based on BIOs rather than struct request.
A zone write lock is taken in dm_zone_map_bio() for any clone BIO with
an operation type that changes the BIO target zone write pointer
position. The zone write lock is released if the clone BIO is failed
before submission or when dm_zone_endio() is called when the clone BIO
completes.

The zone write lock bitmap of the mapped device, together with a bitmap
indicating zone types (conv_zones_bitmap) and the write pointer offset
array (zwp_offset) are allocated and initialized with a full device zone
report in dm_set_zones_restrictions() using the function
dm_revalidate_zones().

For failed operations that may have modified a zone write pointer, the
zone write pointer offset is marked as invalid in dm_zone_endio().
Zones with an invalid write pointer offset are checked and the write
pointer updated using an internal report zone operation when the
faulty zone is accessed again by the user.

All functions added for this emulation have a minimal overhead for
zoned targets natively supporting zone append operations. Regular
device targets are also not affected. The added code also does not
impact builds with CONFIG_BLK_DEV_ZONED disabled by stubbing out all
dm zone related functions.
Signed-off-by: default avatarDamien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: default avatarHimanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: default avatarHannes Reinecke <hare@suse.de>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent e2118b3c
...@@ -114,6 +114,11 @@ struct mapped_device { ...@@ -114,6 +114,11 @@ struct mapped_device {
bool init_tio_pdu:1; bool init_tio_pdu:1;
struct srcu_struct io_barrier; struct srcu_struct io_barrier;
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int nr_zones;
unsigned int *zwp_offset;
#endif
}; };
/* /*
...@@ -128,6 +133,7 @@ struct mapped_device { ...@@ -128,6 +133,7 @@ struct mapped_device {
#define DMF_DEFERRED_REMOVE 6 #define DMF_DEFERRED_REMOVE 6
#define DMF_SUSPENDED_INTERNALLY 7 #define DMF_SUSPENDED_INTERNALLY 7
#define DMF_POST_SUSPENDING 8 #define DMF_POST_SUSPENDING 8
#define DMF_EMULATE_ZONE_APPEND 9
void disable_discard(struct mapped_device *md); void disable_discard(struct mapped_device *md);
void disable_write_same(struct mapped_device *md); void disable_write_same(struct mapped_device *md);
...@@ -143,6 +149,13 @@ static inline struct dm_stats *dm_get_stats(struct mapped_device *md) ...@@ -143,6 +149,13 @@ static inline struct dm_stats *dm_get_stats(struct mapped_device *md)
return &md->stats; return &md->stats;
} }
static inline bool dm_emulate_zone_append(struct mapped_device *md)
{
if (blk_queue_is_zoned(md->queue))
return test_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
return false;
}
#define DM_TABLE_MAX_DEPTH 16 #define DM_TABLE_MAX_DEPTH 16
struct dm_table { struct dm_table {
......
...@@ -1981,11 +1981,12 @@ static int device_requires_stable_pages(struct dm_target *ti, ...@@ -1981,11 +1981,12 @@ static int device_requires_stable_pages(struct dm_target *ti,
return blk_queue_stable_writes(q); return blk_queue_stable_writes(q);
} }
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits) struct queue_limits *limits)
{ {
bool wc = false, fua = false; bool wc = false, fua = false;
int page_size = PAGE_SIZE; int page_size = PAGE_SIZE;
int r;
/* /*
* Copy table's limits to the DM device's request_queue * Copy table's limits to the DM device's request_queue
...@@ -2064,12 +2065,20 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, ...@@ -2064,12 +2065,20 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
dm_table_any_dev_attr(t, device_is_not_random, NULL)) dm_table_any_dev_attr(t, device_is_not_random, NULL))
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
/* For a zoned target, setup the zones related queue attributes */ /*
if (blk_queue_is_zoned(q)) * For a zoned target, setup the zones related queue attributes
dm_set_zones_restrictions(t, q); * and resources necessary for zone append emulation if necessary.
*/
if (blk_queue_is_zoned(q)) {
r = dm_set_zones_restrictions(t, q);
if (r)
return r;
}
dm_update_keyslot_manager(q, t); dm_update_keyslot_manager(q, t);
blk_queue_update_readahead(q); blk_queue_update_readahead(q);
return 0;
} }
unsigned int dm_table_get_num_targets(struct dm_table *t) unsigned int dm_table_get_num_targets(struct dm_table *t)
......
This diff is collapsed.
...@@ -876,7 +876,6 @@ static void clone_endio(struct bio *bio) ...@@ -876,7 +876,6 @@ static void clone_endio(struct bio *bio)
struct dm_io *io = tio->io; struct dm_io *io = tio->io;
struct mapped_device *md = tio->io->md; struct mapped_device *md = tio->io->md;
dm_endio_fn endio = tio->ti->type->end_io; dm_endio_fn endio = tio->ti->type->end_io;
struct bio *orig_bio = io->orig_bio;
struct request_queue *q = bio->bi_bdev->bd_disk->queue; struct request_queue *q = bio->bi_bdev->bd_disk->queue;
if (unlikely(error == BLK_STS_TARGET)) { if (unlikely(error == BLK_STS_TARGET)) {
...@@ -891,17 +890,8 @@ static void clone_endio(struct bio *bio) ...@@ -891,17 +890,8 @@ static void clone_endio(struct bio *bio)
disable_write_zeroes(md); disable_write_zeroes(md);
} }
/* if (blk_queue_is_zoned(q))
* For zone-append bios get offset in zone of the written dm_zone_endio(io, bio);
* sector and add that to the original bio sector pos.
*/
if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) {
sector_t written_sector = bio->bi_iter.bi_sector;
struct request_queue *q = orig_bio->bi_bdev->bd_disk->queue;
u64 mask = (u64)blk_queue_zone_sectors(q) - 1;
orig_bio->bi_iter.bi_sector += written_sector & mask;
}
if (endio) { if (endio) {
int r = endio(tio->ti, bio, &error); int r = endio(tio->ti, bio, &error);
...@@ -1213,7 +1203,16 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) ...@@ -1213,7 +1203,16 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
down(&md->swap_bios_semaphore); down(&md->swap_bios_semaphore);
} }
/*
* Check if the IO needs a special mapping due to zone append emulation
* on zoned target. In this case, dm_zone_map_bio() calls the target
* map operation.
*/
if (dm_emulate_zone_append(io->md))
r = dm_zone_map_bio(tio);
else
r = ti->type->map(ti, clone); r = ti->type->map(ti, clone);
switch (r) { switch (r) {
case DM_MAPIO_SUBMITTED: case DM_MAPIO_SUBMITTED:
break; break;
...@@ -1711,6 +1710,7 @@ static void cleanup_mapped_device(struct mapped_device *md) ...@@ -1711,6 +1710,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
mutex_destroy(&md->swap_bios_lock); mutex_destroy(&md->swap_bios_lock);
dm_mq_cleanup_mapped_device(md); dm_mq_cleanup_mapped_device(md);
dm_cleanup_zoned_dev(md);
} }
/* /*
...@@ -1956,11 +1956,16 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, ...@@ -1956,11 +1956,16 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
goto out; goto out;
} }
ret = dm_table_set_restrictions(t, q, limits);
if (ret) {
old_map = ERR_PTR(ret);
goto out;
}
old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
rcu_assign_pointer(md->map, (void *)t); rcu_assign_pointer(md->map, (void *)t);
md->immutable_target_type = dm_table_get_immutable_target_type(t); md->immutable_target_type = dm_table_get_immutable_target_type(t);
dm_table_set_restrictions(t, q, limits);
if (old_map) if (old_map)
dm_sync_table(md); dm_sync_table(md);
...@@ -2079,7 +2084,10 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) ...@@ -2079,7 +2084,10 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
DMERR("Cannot calculate initial queue limits"); DMERR("Cannot calculate initial queue limits");
return r; return r;
} }
dm_table_set_restrictions(t, md->queue, &limits); r = dm_table_set_restrictions(t, md->queue, &limits);
if (r)
return r;
blk_register_queue(md->disk); blk_register_queue(md->disk);
return 0; return 0;
......
...@@ -45,6 +45,8 @@ struct dm_dev_internal { ...@@ -45,6 +45,8 @@ struct dm_dev_internal {
struct dm_table; struct dm_table;
struct dm_md_mempools; struct dm_md_mempools;
struct dm_target_io;
struct dm_io;
/*----------------------------------------------------------------- /*-----------------------------------------------------------------
* Internal table functions. * Internal table functions.
...@@ -56,7 +58,7 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); ...@@ -56,7 +58,7 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
bool dm_table_has_no_data_devices(struct dm_table *table); bool dm_table_has_no_data_devices(struct dm_table *table);
int dm_calculate_queue_limits(struct dm_table *table, int dm_calculate_queue_limits(struct dm_table *table,
struct queue_limits *limits); struct queue_limits *limits);
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits); struct queue_limits *limits);
struct list_head *dm_table_get_devices(struct dm_table *t); struct list_head *dm_table_get_devices(struct dm_table *t);
void dm_table_presuspend_targets(struct dm_table *t); void dm_table_presuspend_targets(struct dm_table *t);
...@@ -103,17 +105,25 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t); ...@@ -103,17 +105,25 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
/* /*
* Zoned targets related functions. * Zoned targets related functions.
*/ */
void dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q); int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q);
void dm_zone_endio(struct dm_io *io, struct bio *clone);
#ifdef CONFIG_BLK_DEV_ZONED #ifdef CONFIG_BLK_DEV_ZONED
void dm_cleanup_zoned_dev(struct mapped_device *md);
int dm_blk_report_zones(struct gendisk *disk, sector_t sector, int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data); unsigned int nr_zones, report_zones_cb cb, void *data);
bool dm_is_zone_write(struct mapped_device *md, struct bio *bio); bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
int dm_zone_map_bio(struct dm_target_io *io);
#else #else
static inline void dm_cleanup_zoned_dev(struct mapped_device *md) {}
#define dm_blk_report_zones NULL #define dm_blk_report_zones NULL
static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
{ {
return false; return false;
} }
static inline int dm_zone_map_bio(struct dm_target_io *tio)
{
return DM_MAPIO_KILL;
}
#endif #endif
/*----------------------------------------------------------------- /*-----------------------------------------------------------------
......
...@@ -361,6 +361,12 @@ struct dm_target { ...@@ -361,6 +361,12 @@ struct dm_target {
* Set if we need to limit the number of in-flight bios when swapping. * Set if we need to limit the number of in-flight bios when swapping.
*/ */
bool limit_swap_bios:1; bool limit_swap_bios:1;
/*
* Set if this target implements a a zoned device and needs emulation of
* zone append operations using regular writes.
*/
bool emulate_zone_append:1;
}; };
void *dm_per_bio_data(struct bio *bio, size_t data_size); void *dm_per_bio_data(struct bio *bio, size_t data_size);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment