Commit ba368991 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'dm-3.17-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper changes from Mike Snitzer:

 - Allow the thin target to paired with any size external origin; also
   allow thin snapshots to be larger than the external origin.

 - Add support for quickly loading a repetitive pattern into the
   dm-switch target.

 - Use per-bio data in the dm-crypt target instead of always using a
   mempool for each allocation.  Required switching to kmalloc alignment
   for the bio slab.

 - Fix DM core to properly stack the QUEUE_FLAG_NO_SG_MERGE flag

 - Fix the dm-cache and dm-thin targets' export of the minimum_io_size
   to match the data block size -- this fixes an issue where mkfs.xfs
   would improperly infer raid striping was in place on the underlying
   storage.

 - Small cleanups in dm-io, dm-mpath and dm-cache

* tag 'dm-3.17-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm table: propagate QUEUE_FLAG_NO_SG_MERGE
  dm switch: efficiently support repetitive patterns
  dm switch: factor out switch_region_table_read
  dm cache: set minimum_io_size to cache's data block size
  dm thin: set minimum_io_size to pool's data block size
  dm crypt: use per-bio data
  block: use kmalloc alignment for bio slab
  dm table: make dm_table_supports_discards static
  dm cache metadata: use dm-space-map-metadata.h defined size limits
  dm cache: fail migrations in the do_worker error path
  dm cache: simplify deferred set reference count increments
  dm thin: relax external origin size constraints
  dm thin: switch to an atomic_t for tracking pending new block preparations
  dm mpath: eliminate pg_ready() wrapper
  dm io: simplify dec_count and sync_io
parents a8e4def6 200612ec
...@@ -106,6 +106,11 @@ which paths. ...@@ -106,6 +106,11 @@ which paths.
The path number in the range 0 ... (<num_paths> - 1). The path number in the range 0 ... (<num_paths> - 1).
Expressed in hexadecimal (WITHOUT any prefix like 0x). Expressed in hexadecimal (WITHOUT any prefix like 0x).
R<n>,<m>
This parameter allows repetitive patterns to be loaded quickly. <n> and <m>
are hexadecimal numbers. The last <n> mappings are repeated in the next <m>
slots.
Status Status
====== ======
...@@ -124,3 +129,10 @@ Create a switch device with 64kB region size: ...@@ -124,3 +129,10 @@ Create a switch device with 64kB region size:
Set mappings for the first 7 entries to point to devices switch0, switch1, Set mappings for the first 7 entries to point to devices switch0, switch1,
switch2, switch0, switch1, switch2, switch1: switch2, switch0, switch1, switch2, switch1:
dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1 dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1
Set repetitive mapping. This command:
dmsetup message switch 0 set_region_mappings 1000:1 :2 R2,10
is equivalent to:
dmsetup message switch 0 set_region_mappings 1000:1 :2 :1 :2 :1 :2 :1 :2 \
:1 :2 :1 :2 :1 :2 :1 :2 :1 :2
...@@ -112,7 +112,8 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) ...@@ -112,7 +112,8 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
bslab = &bio_slabs[entry]; bslab = &bio_slabs[entry];
snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry); snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL); slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN,
SLAB_HWCACHE_ALIGN, NULL);
if (!slab) if (!slab)
goto out_unlock; goto out_unlock;
......
...@@ -330,7 +330,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) ...@@ -330,7 +330,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
disk_super->discard_root = cpu_to_le64(cmd->discard_root); disk_super->discard_root = cpu_to_le64(cmd->discard_root);
disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks)); disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks));
disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE);
disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
disk_super->cache_blocks = cpu_to_le32(0); disk_super->cache_blocks = cpu_to_le32(0);
...@@ -478,7 +478,7 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd, ...@@ -478,7 +478,7 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
bool may_format_device) bool may_format_device)
{ {
int r; int r;
cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE, cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
CACHE_METADATA_CACHE_SIZE, CACHE_METADATA_CACHE_SIZE,
CACHE_MAX_CONCURRENT_LOCKS); CACHE_MAX_CONCURRENT_LOCKS);
if (IS_ERR(cmd->bm)) { if (IS_ERR(cmd->bm)) {
......
...@@ -9,19 +9,17 @@ ...@@ -9,19 +9,17 @@
#include "dm-cache-block-types.h" #include "dm-cache-block-types.h"
#include "dm-cache-policy-internal.h" #include "dm-cache-policy-internal.h"
#include "persistent-data/dm-space-map-metadata.h"
/*----------------------------------------------------------------*/ /*----------------------------------------------------------------*/
#define DM_CACHE_METADATA_BLOCK_SIZE 4096 #define DM_CACHE_METADATA_BLOCK_SIZE DM_SM_METADATA_BLOCK_SIZE
/* FIXME: remove this restriction */ /* FIXME: remove this restriction */
/* /*
* The metadata device is currently limited in size. * The metadata device is currently limited in size.
*
* We have one block of index, which can hold 255 index entries. Each
* index entry contains allocation info about 16k metadata blocks.
*/ */
#define DM_CACHE_METADATA_MAX_SECTORS (255 * (1 << 14) * (DM_CACHE_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT))) #define DM_CACHE_METADATA_MAX_SECTORS DM_SM_METADATA_MAX_SECTORS
/* /*
* A metadata device larger than 16GB triggers a warning. * A metadata device larger than 16GB triggers a warning.
......
This diff is collapsed.
...@@ -59,7 +59,7 @@ struct dm_crypt_io { ...@@ -59,7 +59,7 @@ struct dm_crypt_io {
int error; int error;
sector_t sector; sector_t sector;
struct dm_crypt_io *base_io; struct dm_crypt_io *base_io;
}; } CRYPTO_MINALIGN_ATTR;
struct dm_crypt_request { struct dm_crypt_request {
struct convert_context *ctx; struct convert_context *ctx;
...@@ -162,6 +162,8 @@ struct crypt_config { ...@@ -162,6 +162,8 @@ struct crypt_config {
*/ */
unsigned int dmreq_start; unsigned int dmreq_start;
unsigned int per_bio_data_size;
unsigned long flags; unsigned long flags;
unsigned int key_size; unsigned int key_size;
unsigned int key_parts; /* independent parts in key buffer */ unsigned int key_parts; /* independent parts in key buffer */
...@@ -895,6 +897,15 @@ static void crypt_alloc_req(struct crypt_config *cc, ...@@ -895,6 +897,15 @@ static void crypt_alloc_req(struct crypt_config *cc,
kcryptd_async_done, dmreq_of_req(cc, ctx->req)); kcryptd_async_done, dmreq_of_req(cc, ctx->req));
} }
static void crypt_free_req(struct crypt_config *cc,
struct ablkcipher_request *req, struct bio *base_bio)
{
struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size);
if ((struct ablkcipher_request *)(io + 1) != req)
mempool_free(req, cc->req_pool);
}
/* /*
* Encrypt / decrypt data from one bio to another one (can be the same one) * Encrypt / decrypt data from one bio to another one (can be the same one)
*/ */
...@@ -1008,12 +1019,9 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) ...@@ -1008,12 +1019,9 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
} }
} }
static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc, static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc,
struct bio *bio, sector_t sector) struct bio *bio, sector_t sector)
{ {
struct dm_crypt_io *io;
io = mempool_alloc(cc->io_pool, GFP_NOIO);
io->cc = cc; io->cc = cc;
io->base_bio = bio; io->base_bio = bio;
io->sector = sector; io->sector = sector;
...@@ -1021,8 +1029,6 @@ static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc, ...@@ -1021,8 +1029,6 @@ static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc,
io->base_io = NULL; io->base_io = NULL;
io->ctx.req = NULL; io->ctx.req = NULL;
atomic_set(&io->io_pending, 0); atomic_set(&io->io_pending, 0);
return io;
} }
static void crypt_inc_pending(struct dm_crypt_io *io) static void crypt_inc_pending(struct dm_crypt_io *io)
...@@ -1046,8 +1052,9 @@ static void crypt_dec_pending(struct dm_crypt_io *io) ...@@ -1046,8 +1052,9 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
return; return;
if (io->ctx.req) if (io->ctx.req)
mempool_free(io->ctx.req, cc->req_pool); crypt_free_req(cc, io->ctx.req, base_bio);
mempool_free(io, cc->io_pool); if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size))
mempool_free(io, cc->io_pool);
if (likely(!base_io)) if (likely(!base_io))
bio_endio(base_bio, error); bio_endio(base_bio, error);
...@@ -1255,8 +1262,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) ...@@ -1255,8 +1262,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
* between fragments, so switch to a new dm_crypt_io structure. * between fragments, so switch to a new dm_crypt_io structure.
*/ */
if (unlikely(!crypt_finished && remaining)) { if (unlikely(!crypt_finished && remaining)) {
new_io = crypt_io_alloc(io->cc, io->base_bio, new_io = mempool_alloc(cc->io_pool, GFP_NOIO);
sector); crypt_io_init(new_io, io->cc, io->base_bio, sector);
crypt_inc_pending(new_io); crypt_inc_pending(new_io);
crypt_convert_init(cc, &new_io->ctx, NULL, crypt_convert_init(cc, &new_io->ctx, NULL,
io->base_bio, sector); io->base_bio, sector);
...@@ -1325,7 +1332,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, ...@@ -1325,7 +1332,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
if (error < 0) if (error < 0)
io->error = -EIO; io->error = -EIO;
mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool); crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
if (!atomic_dec_and_test(&ctx->cc_pending)) if (!atomic_dec_and_test(&ctx->cc_pending))
return; return;
...@@ -1728,6 +1735,10 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1728,6 +1735,10 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad; goto bad;
} }
cc->per_bio_data_size = ti->per_bio_data_size =
sizeof(struct dm_crypt_io) + cc->dmreq_start +
sizeof(struct dm_crypt_request) + cc->iv_size;
cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
if (!cc->page_pool) { if (!cc->page_pool) {
ti->error = "Cannot allocate page mempool"; ti->error = "Cannot allocate page mempool";
...@@ -1824,7 +1835,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) ...@@ -1824,7 +1835,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED; return DM_MAPIO_REMAPPED;
} }
io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); io = dm_per_bio_data(bio, cc->per_bio_data_size);
crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
io->ctx.req = (struct ablkcipher_request *)(io + 1);
if (bio_data_dir(io->base_bio) == READ) { if (bio_data_dir(io->base_bio) == READ) {
if (kcryptd_io_read(io, GFP_NOWAIT)) if (kcryptd_io_read(io, GFP_NOWAIT))
......
...@@ -33,7 +33,6 @@ struct dm_io_client { ...@@ -33,7 +33,6 @@ struct dm_io_client {
struct io { struct io {
unsigned long error_bits; unsigned long error_bits;
atomic_t count; atomic_t count;
struct completion *wait;
struct dm_io_client *client; struct dm_io_client *client;
io_notify_fn callback; io_notify_fn callback;
void *context; void *context;
...@@ -112,28 +111,27 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, ...@@ -112,28 +111,27 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
* We need an io object to keep track of the number of bios that * We need an io object to keep track of the number of bios that
* have been dispatched for a particular io. * have been dispatched for a particular io.
*---------------------------------------------------------------*/ *---------------------------------------------------------------*/
static void dec_count(struct io *io, unsigned int region, int error) static void complete_io(struct io *io)
{ {
if (error) unsigned long error_bits = io->error_bits;
set_bit(region, &io->error_bits); io_notify_fn fn = io->callback;
void *context = io->context;
if (atomic_dec_and_test(&io->count)) { if (io->vma_invalidate_size)
if (io->vma_invalidate_size) invalidate_kernel_vmap_range(io->vma_invalidate_address,
invalidate_kernel_vmap_range(io->vma_invalidate_address, io->vma_invalidate_size);
io->vma_invalidate_size);
if (io->wait) mempool_free(io, io->client->pool);
complete(io->wait); fn(error_bits, context);
}
else { static void dec_count(struct io *io, unsigned int region, int error)
unsigned long r = io->error_bits; {
io_notify_fn fn = io->callback; if (error)
void *context = io->context; set_bit(region, &io->error_bits);
mempool_free(io, io->client->pool); if (atomic_dec_and_test(&io->count))
fn(r, context); complete_io(io);
}
}
} }
static void endio(struct bio *bio, int error) static void endio(struct bio *bio, int error)
...@@ -376,41 +374,51 @@ static void dispatch_io(int rw, unsigned int num_regions, ...@@ -376,41 +374,51 @@ static void dispatch_io(int rw, unsigned int num_regions,
dec_count(io, 0, 0); dec_count(io, 0, 0);
} }
struct sync_io {
unsigned long error_bits;
struct completion wait;
};
static void sync_io_complete(unsigned long error, void *context)
{
struct sync_io *sio = context;
sio->error_bits = error;
complete(&sio->wait);
}
static int sync_io(struct dm_io_client *client, unsigned int num_regions, static int sync_io(struct dm_io_client *client, unsigned int num_regions,
struct dm_io_region *where, int rw, struct dpages *dp, struct dm_io_region *where, int rw, struct dpages *dp,
unsigned long *error_bits) unsigned long *error_bits)
{ {
/* struct io *io;
* gcc <= 4.3 can't do the alignment for stack variables, so we must struct sync_io sio;
* align it on our own.
* volatile prevents the optimizer from removing or reusing
* "io_" field from the stack frame (allowed in ANSI C).
*/
volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1];
struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io));
DECLARE_COMPLETION_ONSTACK(wait);
if (num_regions > 1 && (rw & RW_MASK) != WRITE) { if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
WARN_ON(1); WARN_ON(1);
return -EIO; return -EIO;
} }
init_completion(&sio.wait);
io = mempool_alloc(client->pool, GFP_NOIO);
io->error_bits = 0; io->error_bits = 0;
atomic_set(&io->count, 1); /* see dispatch_io() */ atomic_set(&io->count, 1); /* see dispatch_io() */
io->wait = &wait;
io->client = client; io->client = client;
io->callback = sync_io_complete;
io->context = &sio;
io->vma_invalidate_address = dp->vma_invalidate_address; io->vma_invalidate_address = dp->vma_invalidate_address;
io->vma_invalidate_size = dp->vma_invalidate_size; io->vma_invalidate_size = dp->vma_invalidate_size;
dispatch_io(rw, num_regions, where, dp, io, 1); dispatch_io(rw, num_regions, where, dp, io, 1);
wait_for_completion_io(&wait); wait_for_completion_io(&sio.wait);
if (error_bits) if (error_bits)
*error_bits = io->error_bits; *error_bits = sio.error_bits;
return io->error_bits ? -EIO : 0; return sio.error_bits ? -EIO : 0;
} }
static int async_io(struct dm_io_client *client, unsigned int num_regions, static int async_io(struct dm_io_client *client, unsigned int num_regions,
...@@ -428,7 +436,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, ...@@ -428,7 +436,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
io = mempool_alloc(client->pool, GFP_NOIO); io = mempool_alloc(client->pool, GFP_NOIO);
io->error_bits = 0; io->error_bits = 0;
atomic_set(&io->count, 1); /* see dispatch_io() */ atomic_set(&io->count, 1); /* see dispatch_io() */
io->wait = NULL;
io->client = client; io->client = client;
io->callback = fn; io->callback = fn;
io->context = context; io->context = context;
...@@ -481,9 +488,9 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp, ...@@ -481,9 +488,9 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
* New collapsed (a)synchronous interface. * New collapsed (a)synchronous interface.
* *
* If the IO is asynchronous (i.e. it has notify.fn), you must either unplug * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug
* the queue with blk_unplug() some time later or set REQ_SYNC in * the queue with blk_unplug() some time later or set REQ_SYNC in io_req->bi_rw.
io_req->bi_rw. If you fail to do one of these, the IO will be submitted to * If you fail to do one of these, the IO will be submitted to the disk after
* the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c. * q->unplug_delay, which defaults to 3ms in blk-settings.c.
*/ */
int dm_io(struct dm_io_request *io_req, unsigned num_regions, int dm_io(struct dm_io_request *io_req, unsigned num_regions,
struct dm_io_region *where, unsigned long *sync_error_bits) struct dm_io_region *where, unsigned long *sync_error_bits)
......
...@@ -373,8 +373,6 @@ static int __must_push_back(struct multipath *m) ...@@ -373,8 +373,6 @@ static int __must_push_back(struct multipath *m)
dm_noflush_suspending(m->ti))); dm_noflush_suspending(m->ti)));
} }
#define pg_ready(m) (!(m)->queue_io && !(m)->pg_init_required)
/* /*
* Map cloned requests * Map cloned requests
*/ */
...@@ -402,11 +400,11 @@ static int multipath_map(struct dm_target *ti, struct request *clone, ...@@ -402,11 +400,11 @@ static int multipath_map(struct dm_target *ti, struct request *clone,
if (!__must_push_back(m)) if (!__must_push_back(m))
r = -EIO; /* Failed */ r = -EIO; /* Failed */
goto out_unlock; goto out_unlock;
} } else if (m->queue_io || m->pg_init_required) {
if (!pg_ready(m)) {
__pg_init_all_paths(m); __pg_init_all_paths(m);
goto out_unlock; goto out_unlock;
} }
if (set_mapinfo(m, map_context) < 0) if (set_mapinfo(m, map_context) < 0)
/* ENOMEM, requeue */ /* ENOMEM, requeue */
goto out_unlock; goto out_unlock;
......
...@@ -137,13 +137,23 @@ static void switch_get_position(struct switch_ctx *sctx, unsigned long region_nr ...@@ -137,13 +137,23 @@ static void switch_get_position(struct switch_ctx *sctx, unsigned long region_nr
*bit *= sctx->region_table_entry_bits; *bit *= sctx->region_table_entry_bits;
} }
static unsigned switch_region_table_read(struct switch_ctx *sctx, unsigned long region_nr)
{
unsigned long region_index;
unsigned bit;
switch_get_position(sctx, region_nr, &region_index, &bit);
return (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) &
((1 << sctx->region_table_entry_bits) - 1);
}
/* /*
* Find which path to use at given offset. * Find which path to use at given offset.
*/ */
static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset) static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset)
{ {
unsigned long region_index; unsigned path_nr;
unsigned bit, path_nr;
sector_t p; sector_t p;
p = offset; p = offset;
...@@ -152,9 +162,7 @@ static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset) ...@@ -152,9 +162,7 @@ static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset)
else else
sector_div(p, sctx->region_size); sector_div(p, sctx->region_size);
switch_get_position(sctx, p, &region_index, &bit); path_nr = switch_region_table_read(sctx, p);
path_nr = (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) &
((1 << sctx->region_table_entry_bits) - 1);
/* This can only happen if the processor uses non-atomic stores. */ /* This can only happen if the processor uses non-atomic stores. */
if (unlikely(path_nr >= sctx->nr_paths)) if (unlikely(path_nr >= sctx->nr_paths))
...@@ -363,7 +371,7 @@ static __always_inline unsigned long parse_hex(const char **string) ...@@ -363,7 +371,7 @@ static __always_inline unsigned long parse_hex(const char **string)
} }
static int process_set_region_mappings(struct switch_ctx *sctx, static int process_set_region_mappings(struct switch_ctx *sctx,
unsigned argc, char **argv) unsigned argc, char **argv)
{ {
unsigned i; unsigned i;
unsigned long region_index = 0; unsigned long region_index = 0;
...@@ -372,6 +380,51 @@ static int process_set_region_mappings(struct switch_ctx *sctx, ...@@ -372,6 +380,51 @@ static int process_set_region_mappings(struct switch_ctx *sctx,
unsigned long path_nr; unsigned long path_nr;
const char *string = argv[i]; const char *string = argv[i];
if ((*string & 0xdf) == 'R') {
unsigned long cycle_length, num_write;
string++;
if (unlikely(*string == ',')) {
DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
return -EINVAL;
}
cycle_length = parse_hex(&string);
if (unlikely(*string != ',')) {
DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
return -EINVAL;
}
string++;
if (unlikely(!*string)) {
DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
return -EINVAL;
}
num_write = parse_hex(&string);
if (unlikely(*string)) {
DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
return -EINVAL;
}
if (unlikely(!cycle_length) || unlikely(cycle_length - 1 > region_index)) {
DMWARN("invalid set_region_mappings cycle length: %lu > %lu",
cycle_length - 1, region_index);
return -EINVAL;
}
if (unlikely(region_index + num_write < region_index) ||
unlikely(region_index + num_write >= sctx->nr_regions)) {
DMWARN("invalid set_region_mappings region number: %lu + %lu >= %lu",
region_index, num_write, sctx->nr_regions);
return -EINVAL;
}
while (num_write--) {
region_index++;
path_nr = switch_region_table_read(sctx, region_index - cycle_length);
switch_region_table_write(sctx, region_index, path_nr);
}
continue;
}
if (*string == ':') if (*string == ':')
region_index++; region_index++;
else { else {
...@@ -500,7 +553,7 @@ static int switch_iterate_devices(struct dm_target *ti, ...@@ -500,7 +553,7 @@ static int switch_iterate_devices(struct dm_target *ti,
static struct target_type switch_target = { static struct target_type switch_target = {
.name = "switch", .name = "switch",
.version = {1, 0, 0}, .version = {1, 1, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = switch_ctr, .ctr = switch_ctr,
.dtr = switch_dtr, .dtr = switch_dtr,
......
...@@ -1386,6 +1386,14 @@ static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, ...@@ -1386,6 +1386,14 @@ static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
return q && !blk_queue_add_random(q); return q && !blk_queue_add_random(q);
} }
static int queue_supports_sg_merge(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
struct request_queue *q = bdev_get_queue(dev->bdev);
return q && !test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
}
static bool dm_table_all_devices_attribute(struct dm_table *t, static bool dm_table_all_devices_attribute(struct dm_table *t,
iterate_devices_callout_fn func) iterate_devices_callout_fn func)
{ {
...@@ -1430,6 +1438,43 @@ static bool dm_table_supports_write_same(struct dm_table *t) ...@@ -1430,6 +1438,43 @@ static bool dm_table_supports_write_same(struct dm_table *t)
return true; return true;
} }
static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
struct request_queue *q = bdev_get_queue(dev->bdev);
return q && blk_queue_discard(q);
}
static bool dm_table_supports_discards(struct dm_table *t)
{
struct dm_target *ti;
unsigned i = 0;
/*
* Unless any target used by the table set discards_supported,
* require at least one underlying device to support discards.
* t->devices includes internal dm devices such as mirror logs
* so we need to use iterate_devices here, which targets
* supporting discard selectively must provide.
*/
while (i < dm_table_get_num_targets(t)) {
ti = dm_table_get_target(t, i++);
if (!ti->num_discard_bios)
continue;
if (ti->discards_supported)
return 1;
if (ti->type->iterate_devices &&
ti->type->iterate_devices(ti, device_discard_capable, NULL))
return 1;
}
return 0;
}
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits) struct queue_limits *limits)
{ {
...@@ -1464,6 +1509,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, ...@@ -1464,6 +1509,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
if (!dm_table_supports_write_same(t)) if (!dm_table_supports_write_same(t))
q->limits.max_write_same_sectors = 0; q->limits.max_write_same_sectors = 0;
if (dm_table_all_devices_attribute(t, queue_supports_sg_merge))
queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
else
queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
dm_table_set_integrity(t); dm_table_set_integrity(t);
/* /*
...@@ -1636,39 +1686,3 @@ void dm_table_run_md_queue_async(struct dm_table *t) ...@@ -1636,39 +1686,3 @@ void dm_table_run_md_queue_async(struct dm_table *t)
} }
EXPORT_SYMBOL(dm_table_run_md_queue_async); EXPORT_SYMBOL(dm_table_run_md_queue_async);
static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
struct request_queue *q = bdev_get_queue(dev->bdev);
return q && blk_queue_discard(q);
}
bool dm_table_supports_discards(struct dm_table *t)
{
struct dm_target *ti;
unsigned i = 0;
/*
* Unless any target used by the table set discards_supported,
* require at least one underlying device to support discards.
* t->devices includes internal dm devices such as mirror logs
* so we need to use iterate_devices here, which targets
* supporting discard selectively must provide.
*/
while (i < dm_table_get_num_targets(t)) {
ti = dm_table_get_target(t, i++);
if (!ti->num_discard_bios)
continue;
if (ti->discards_supported)
return 1;
if (ti->type->iterate_devices &&
ti->type->iterate_devices(ti, device_discard_capable, NULL))
return 1;
}
return 0;
}
...@@ -227,6 +227,7 @@ struct thin_c { ...@@ -227,6 +227,7 @@ struct thin_c {
struct list_head list; struct list_head list;
struct dm_dev *pool_dev; struct dm_dev *pool_dev;
struct dm_dev *origin_dev; struct dm_dev *origin_dev;
sector_t origin_size;
dm_thin_id dev_id; dm_thin_id dev_id;
struct pool *pool; struct pool *pool;
...@@ -554,11 +555,16 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio, ...@@ -554,11 +555,16 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio,
struct dm_thin_new_mapping { struct dm_thin_new_mapping {
struct list_head list; struct list_head list;
bool quiesced:1;
bool prepared:1;
bool pass_discard:1; bool pass_discard:1;
bool definitely_not_shared:1; bool definitely_not_shared:1;
/*
* Track quiescing, copying and zeroing preparation actions. When this
* counter hits zero the block is prepared and can be inserted into the
* btree.
*/
atomic_t prepare_actions;
int err; int err;
struct thin_c *tc; struct thin_c *tc;
dm_block_t virt_block; dm_block_t virt_block;
...@@ -575,43 +581,41 @@ struct dm_thin_new_mapping { ...@@ -575,43 +581,41 @@ struct dm_thin_new_mapping {
bio_end_io_t *saved_bi_end_io; bio_end_io_t *saved_bi_end_io;
}; };
static void __maybe_add_mapping(struct dm_thin_new_mapping *m) static void __complete_mapping_preparation(struct dm_thin_new_mapping *m)
{ {
struct pool *pool = m->tc->pool; struct pool *pool = m->tc->pool;
if (m->quiesced && m->prepared) { if (atomic_dec_and_test(&m->prepare_actions)) {
list_add_tail(&m->list, &pool->prepared_mappings); list_add_tail(&m->list, &pool->prepared_mappings);
wake_worker(pool); wake_worker(pool);
} }
} }
static void copy_complete(int read_err, unsigned long write_err, void *context) static void complete_mapping_preparation(struct dm_thin_new_mapping *m)
{ {
unsigned long flags; unsigned long flags;
struct dm_thin_new_mapping *m = context;
struct pool *pool = m->tc->pool; struct pool *pool = m->tc->pool;
m->err = read_err || write_err ? -EIO : 0;
spin_lock_irqsave(&pool->lock, flags); spin_lock_irqsave(&pool->lock, flags);
m->prepared = true; __complete_mapping_preparation(m);
__maybe_add_mapping(m);
spin_unlock_irqrestore(&pool->lock, flags); spin_unlock_irqrestore(&pool->lock, flags);
} }
static void copy_complete(int read_err, unsigned long write_err, void *context)
{
struct dm_thin_new_mapping *m = context;
m->err = read_err || write_err ? -EIO : 0;
complete_mapping_preparation(m);
}
static void overwrite_endio(struct bio *bio, int err) static void overwrite_endio(struct bio *bio, int err)
{ {
unsigned long flags;
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
struct dm_thin_new_mapping *m = h->overwrite_mapping; struct dm_thin_new_mapping *m = h->overwrite_mapping;
struct pool *pool = m->tc->pool;
m->err = err; m->err = err;
complete_mapping_preparation(m);
spin_lock_irqsave(&pool->lock, flags);
m->prepared = true;
__maybe_add_mapping(m);
spin_unlock_irqrestore(&pool->lock, flags);
} }
/*----------------------------------------------------------------*/ /*----------------------------------------------------------------*/
...@@ -821,10 +825,31 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool) ...@@ -821,10 +825,31 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
return m; return m;
} }
static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m,
sector_t begin, sector_t end)
{
int r;
struct dm_io_region to;
to.bdev = tc->pool_dev->bdev;
to.sector = begin;
to.count = end - begin;
r = dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m);
if (r < 0) {
DMERR_LIMIT("dm_kcopyd_zero() failed");
copy_complete(1, 1, m);
}
}
/*
* A partial copy also needs to zero the uncopied region.
*/
static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
struct dm_dev *origin, dm_block_t data_origin, struct dm_dev *origin, dm_block_t data_origin,
dm_block_t data_dest, dm_block_t data_dest,
struct dm_bio_prison_cell *cell, struct bio *bio) struct dm_bio_prison_cell *cell, struct bio *bio,
sector_t len)
{ {
int r; int r;
struct pool *pool = tc->pool; struct pool *pool = tc->pool;
...@@ -835,8 +860,15 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, ...@@ -835,8 +860,15 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
m->data_block = data_dest; m->data_block = data_dest;
m->cell = cell; m->cell = cell;
/*
* quiesce action + copy action + an extra reference held for the
* duration of this function (we may need to inc later for a
* partial zero).
*/
atomic_set(&m->prepare_actions, 3);
if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list)) if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list))
m->quiesced = true; complete_mapping_preparation(m); /* already quiesced */
/* /*
* IO to pool_dev remaps to the pool target's data_dev. * IO to pool_dev remaps to the pool target's data_dev.
...@@ -857,20 +889,38 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, ...@@ -857,20 +889,38 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
from.bdev = origin->bdev; from.bdev = origin->bdev;
from.sector = data_origin * pool->sectors_per_block; from.sector = data_origin * pool->sectors_per_block;
from.count = pool->sectors_per_block; from.count = len;
to.bdev = tc->pool_dev->bdev; to.bdev = tc->pool_dev->bdev;
to.sector = data_dest * pool->sectors_per_block; to.sector = data_dest * pool->sectors_per_block;
to.count = pool->sectors_per_block; to.count = len;
r = dm_kcopyd_copy(pool->copier, &from, 1, &to, r = dm_kcopyd_copy(pool->copier, &from, 1, &to,
0, copy_complete, m); 0, copy_complete, m);
if (r < 0) { if (r < 0) {
mempool_free(m, pool->mapping_pool);
DMERR_LIMIT("dm_kcopyd_copy() failed"); DMERR_LIMIT("dm_kcopyd_copy() failed");
cell_error(pool, cell); copy_complete(1, 1, m);
/*
* We allow the zero to be issued, to simplify the
* error path. Otherwise we'd need to start
* worrying about decrementing the prepare_actions
* counter.
*/
}
/*
* Do we need to zero a tail region?
*/
if (len < pool->sectors_per_block && pool->pf.zero_new_blocks) {
atomic_inc(&m->prepare_actions);
ll_zero(tc, m,
data_dest * pool->sectors_per_block + len,
(data_dest + 1) * pool->sectors_per_block);
} }
} }
complete_mapping_preparation(m); /* drop our ref */
} }
static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block, static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
...@@ -878,15 +928,8 @@ static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block, ...@@ -878,15 +928,8 @@ static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
struct dm_bio_prison_cell *cell, struct bio *bio) struct dm_bio_prison_cell *cell, struct bio *bio)
{ {
schedule_copy(tc, virt_block, tc->pool_dev, schedule_copy(tc, virt_block, tc->pool_dev,
data_origin, data_dest, cell, bio); data_origin, data_dest, cell, bio,
} tc->pool->sectors_per_block);
static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
dm_block_t data_dest,
struct dm_bio_prison_cell *cell, struct bio *bio)
{
schedule_copy(tc, virt_block, tc->origin_dev,
virt_block, data_dest, cell, bio);
} }
static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
...@@ -896,8 +939,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, ...@@ -896,8 +939,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
struct pool *pool = tc->pool; struct pool *pool = tc->pool;
struct dm_thin_new_mapping *m = get_next_mapping(pool); struct dm_thin_new_mapping *m = get_next_mapping(pool);
m->quiesced = true; atomic_set(&m->prepare_actions, 1); /* no need to quiesce */
m->prepared = false;
m->tc = tc; m->tc = tc;
m->virt_block = virt_block; m->virt_block = virt_block;
m->data_block = data_block; m->data_block = data_block;
...@@ -919,21 +961,33 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, ...@@ -919,21 +961,33 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
inc_all_io_entry(pool, bio); inc_all_io_entry(pool, bio);
remap_and_issue(tc, bio, data_block); remap_and_issue(tc, bio, data_block);
} else {
int r;
struct dm_io_region to;
to.bdev = tc->pool_dev->bdev; } else
to.sector = data_block * pool->sectors_per_block; ll_zero(tc, m,
to.count = pool->sectors_per_block; data_block * pool->sectors_per_block,
(data_block + 1) * pool->sectors_per_block);
}
r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m); static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
if (r < 0) { dm_block_t data_dest,
mempool_free(m, pool->mapping_pool); struct dm_bio_prison_cell *cell, struct bio *bio)
DMERR_LIMIT("dm_kcopyd_zero() failed"); {
cell_error(pool, cell); struct pool *pool = tc->pool;
} sector_t virt_block_begin = virt_block * pool->sectors_per_block;
} sector_t virt_block_end = (virt_block + 1) * pool->sectors_per_block;
if (virt_block_end <= tc->origin_size)
schedule_copy(tc, virt_block, tc->origin_dev,
virt_block, data_dest, cell, bio,
pool->sectors_per_block);
else if (virt_block_begin < tc->origin_size)
schedule_copy(tc, virt_block, tc->origin_dev,
virt_block, data_dest, cell, bio,
tc->origin_size - virt_block_begin);
else
schedule_zero(tc, virt_block, data_dest, cell, bio);
} }
/* /*
...@@ -1315,7 +1369,18 @@ static void process_bio(struct thin_c *tc, struct bio *bio) ...@@ -1315,7 +1369,18 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
inc_all_io_entry(pool, bio); inc_all_io_entry(pool, bio);
cell_defer_no_holder(tc, cell); cell_defer_no_holder(tc, cell);
remap_to_origin_and_issue(tc, bio); if (bio_end_sector(bio) <= tc->origin_size)
remap_to_origin_and_issue(tc, bio);
else if (bio->bi_iter.bi_sector < tc->origin_size) {
zero_fill_bio(bio);
bio->bi_iter.bi_size = (tc->origin_size - bio->bi_iter.bi_sector) << SECTOR_SHIFT;
remap_to_origin_and_issue(tc, bio);
} else {
zero_fill_bio(bio);
bio_endio(bio, 0);
}
} else } else
provision_block(tc, bio, block, cell); provision_block(tc, bio, block, cell);
break; break;
...@@ -3112,7 +3177,7 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) ...@@ -3112,7 +3177,7 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
*/ */
if (io_opt_sectors < pool->sectors_per_block || if (io_opt_sectors < pool->sectors_per_block ||
do_div(io_opt_sectors, pool->sectors_per_block)) { do_div(io_opt_sectors, pool->sectors_per_block)) {
blk_limits_io_min(limits, 0); blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT);
blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
} }
...@@ -3141,7 +3206,7 @@ static struct target_type pool_target = { ...@@ -3141,7 +3206,7 @@ static struct target_type pool_target = {
.name = "thin-pool", .name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE, DM_TARGET_IMMUTABLE,
.version = {1, 12, 0}, .version = {1, 13, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = pool_ctr, .ctr = pool_ctr,
.dtr = pool_dtr, .dtr = pool_dtr,
...@@ -3361,8 +3426,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err) ...@@ -3361,8 +3426,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
spin_lock_irqsave(&pool->lock, flags); spin_lock_irqsave(&pool->lock, flags);
list_for_each_entry_safe(m, tmp, &work, list) { list_for_each_entry_safe(m, tmp, &work, list) {
list_del(&m->list); list_del(&m->list);
m->quiesced = true; __complete_mapping_preparation(m);
__maybe_add_mapping(m);
} }
spin_unlock_irqrestore(&pool->lock, flags); spin_unlock_irqrestore(&pool->lock, flags);
} }
...@@ -3401,6 +3465,16 @@ static void thin_postsuspend(struct dm_target *ti) ...@@ -3401,6 +3465,16 @@ static void thin_postsuspend(struct dm_target *ti)
noflush_work(tc, do_noflush_stop); noflush_work(tc, do_noflush_stop);
} }
static int thin_preresume(struct dm_target *ti)
{
struct thin_c *tc = ti->private;
if (tc->origin_dev)
tc->origin_size = get_dev_size(tc->origin_dev->bdev);
return 0;
}
/* /*
* <nr mapped sectors> <highest mapped sector> * <nr mapped sectors> <highest mapped sector>
*/ */
...@@ -3483,12 +3557,13 @@ static int thin_iterate_devices(struct dm_target *ti, ...@@ -3483,12 +3557,13 @@ static int thin_iterate_devices(struct dm_target *ti,
static struct target_type thin_target = { static struct target_type thin_target = {
.name = "thin", .name = "thin",
.version = {1, 12, 0}, .version = {1, 13, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = thin_ctr, .ctr = thin_ctr,
.dtr = thin_dtr, .dtr = thin_dtr,
.map = thin_map, .map = thin_map,
.end_io = thin_endio, .end_io = thin_endio,
.preresume = thin_preresume,
.presuspend = thin_presuspend, .presuspend = thin_presuspend,
.postsuspend = thin_postsuspend, .postsuspend = thin_postsuspend,
.status = thin_status, .status = thin_status,
......
...@@ -72,7 +72,6 @@ int dm_table_any_busy_target(struct dm_table *t); ...@@ -72,7 +72,6 @@ int dm_table_any_busy_target(struct dm_table *t);
unsigned dm_table_get_type(struct dm_table *t); unsigned dm_table_get_type(struct dm_table *t);
struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t); bool dm_table_request_based(struct dm_table *t);
bool dm_table_supports_discards(struct dm_table *t);
void dm_table_free_md_mempools(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t);
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment