Commit a911dcdb authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'dm-3.20-changes-2' of...

Merge tag 'dm-3.20-changes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull more device mapper changes from Mike Snitzer:

- Significant dm-crypt CPU scalability performance improvements thanks
  to changes that enable effective use of an unbound workqueue across
  all available CPUs.  A large battery of tests were performed to
  validate these changes, summary of results is available here:
  https://www.redhat.com/archives/dm-devel/2015-February/msg00106.html

- A few additional stable fixes (to DM core, dm-snapshot and dm-mirror)
  and a small fix to the dm-space-map-disk.

* tag 'dm-3.20-changes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm snapshot: fix a possible invalid memory access on unload
  dm: fix a race condition in dm_get_md
  dm crypt: sort writes
  dm crypt: add 'submit_from_crypt_cpus' option
  dm crypt: offload writes to thread
  dm crypt: remove unused io_pool and _crypt_io_pool
  dm crypt: avoid deadlock in mempools
  dm crypt: don't allocate pages for a partial request
  dm crypt: use unbound workqueue for request processing
  dm io: reject unsupported DISCARD requests with EOPNOTSUPP
  dm mirror: do not degrade the mirror on discard error
  dm space map disk: fix sm_disk_count_is_more_than_one()
parents e20d3ef5 22aa66a3
...@@ -51,7 +51,7 @@ Parameters: <cipher> <key> <iv_offset> <device path> \ ...@@ -51,7 +51,7 @@ Parameters: <cipher> <key> <iv_offset> <device path> \
Otherwise #opt_params is the number of following arguments. Otherwise #opt_params is the number of following arguments.
Example of optional parameters section: Example of optional parameters section:
1 allow_discards 3 allow_discards same_cpu_crypt submit_from_crypt_cpus
allow_discards allow_discards
Block discard requests (a.k.a. TRIM) are passed through the crypt device. Block discard requests (a.k.a. TRIM) are passed through the crypt device.
...@@ -63,6 +63,19 @@ allow_discards ...@@ -63,6 +63,19 @@ allow_discards
used space etc.) if the discarded blocks can be located easily on the used space etc.) if the discarded blocks can be located easily on the
device later. device later.
same_cpu_crypt
Perform encryption using the same cpu that IO was submitted on.
The default is to use an unbound workqueue so that encryption work
is automatically balanced between available CPUs.
submit_from_crypt_cpus
Disable offloading writes to a separate thread after encryption.
There are some situations where offloading write bios from the
encryption threads to a single thread degrades performance
significantly. The default is to offload write bios to the same
thread because it benefits CFQ to have writes submitted using the
same context.
Example scripts Example scripts
=============== ===============
LUKS (Linux Unified Key Setup) is now the preferred way to set up disk LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
......
...@@ -18,9 +18,11 @@ ...@@ -18,9 +18,11 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/crypto.h> #include <linux/crypto.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <linux/rbtree.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/unaligned.h> #include <asm/unaligned.h>
#include <crypto/hash.h> #include <crypto/hash.h>
...@@ -58,7 +60,8 @@ struct dm_crypt_io { ...@@ -58,7 +60,8 @@ struct dm_crypt_io {
atomic_t io_pending; atomic_t io_pending;
int error; int error;
sector_t sector; sector_t sector;
struct dm_crypt_io *base_io;
struct rb_node rb_node;
} CRYPTO_MINALIGN_ATTR; } CRYPTO_MINALIGN_ATTR;
struct dm_crypt_request { struct dm_crypt_request {
...@@ -108,7 +111,8 @@ struct iv_tcw_private { ...@@ -108,7 +111,8 @@ struct iv_tcw_private {
* Crypt: maps a linear range of a block device * Crypt: maps a linear range of a block device
* and encrypts / decrypts at the same time. * and encrypts / decrypts at the same time.
*/ */
enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD };
/* /*
* The fields in here must be read only after initialization. * The fields in here must be read only after initialization.
...@@ -121,14 +125,18 @@ struct crypt_config { ...@@ -121,14 +125,18 @@ struct crypt_config {
* pool for per bio private data, crypto requests and * pool for per bio private data, crypto requests and
* encryption requeusts/buffer pages * encryption requeusts/buffer pages
*/ */
mempool_t *io_pool;
mempool_t *req_pool; mempool_t *req_pool;
mempool_t *page_pool; mempool_t *page_pool;
struct bio_set *bs; struct bio_set *bs;
struct mutex bio_alloc_lock;
struct workqueue_struct *io_queue; struct workqueue_struct *io_queue;
struct workqueue_struct *crypt_queue; struct workqueue_struct *crypt_queue;
struct task_struct *write_thread;
wait_queue_head_t write_thread_wait;
struct rb_root write_tree;
char *cipher; char *cipher;
char *cipher_string; char *cipher_string;
...@@ -172,9 +180,6 @@ struct crypt_config { ...@@ -172,9 +180,6 @@ struct crypt_config {
}; };
#define MIN_IOS 16 #define MIN_IOS 16
#define MIN_POOL_PAGES 32
static struct kmem_cache *_crypt_io_pool;
static void clone_init(struct dm_crypt_io *, struct bio *); static void clone_init(struct dm_crypt_io *, struct bio *);
static void kcryptd_queue_crypt(struct dm_crypt_io *io); static void kcryptd_queue_crypt(struct dm_crypt_io *io);
...@@ -946,57 +951,70 @@ static int crypt_convert(struct crypt_config *cc, ...@@ -946,57 +951,70 @@ static int crypt_convert(struct crypt_config *cc,
return 0; return 0;
} }
static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);
/* /*
* Generate a new unfragmented bio with the given size * Generate a new unfragmented bio with the given size
* This should never violate the device limitations * This should never violate the device limitations
* May return a smaller bio when running out of pages, indicated by *
* *out_of_pages set to 1. * This function may be called concurrently. If we allocate from the mempool
* concurrently, there is a possibility of deadlock. For example, if we have
* mempool of 256 pages, two processes, each wanting 256, pages allocate from
* the mempool concurrently, it may deadlock in a situation where both processes
* have allocated 128 pages and the mempool is exhausted.
*
* In order to avoid this scenario we allocate the pages under a mutex.
*
* In order to not degrade performance with excessive locking, we try
* non-blocking allocations without a mutex first but on failure we fallback
* to blocking allocations with a mutex.
*/ */
static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
unsigned *out_of_pages)
{ {
struct crypt_config *cc = io->cc; struct crypt_config *cc = io->cc;
struct bio *clone; struct bio *clone;
unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
unsigned i, len; unsigned i, len, remaining_size;
struct page *page; struct page *page;
struct bio_vec *bvec;
retry:
if (unlikely(gfp_mask & __GFP_WAIT))
mutex_lock(&cc->bio_alloc_lock);
clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
if (!clone) if (!clone)
return NULL; goto return_clone;
clone_init(io, clone); clone_init(io, clone);
*out_of_pages = 0;
remaining_size = size;
for (i = 0; i < nr_iovecs; i++) { for (i = 0; i < nr_iovecs; i++) {
page = mempool_alloc(cc->page_pool, gfp_mask); page = mempool_alloc(cc->page_pool, gfp_mask);
if (!page) { if (!page) {
*out_of_pages = 1; crypt_free_buffer_pages(cc, clone);
break; bio_put(clone);
gfp_mask |= __GFP_WAIT;
goto retry;
} }
/* len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
* If additional pages cannot be allocated without waiting,
* return a partially-allocated bio. The caller will then try
* to allocate more bios while submitting this partial bio.
*/
gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
len = (size > PAGE_SIZE) ? PAGE_SIZE : size; bvec = &clone->bi_io_vec[clone->bi_vcnt++];
bvec->bv_page = page;
bvec->bv_len = len;
bvec->bv_offset = 0;
if (!bio_add_page(clone, page, len, 0)) { clone->bi_iter.bi_size += len;
mempool_free(page, cc->page_pool);
break;
}
size -= len; remaining_size -= len;
} }
if (!clone->bi_iter.bi_size) { return_clone:
bio_put(clone); if (unlikely(gfp_mask & __GFP_WAIT))
return NULL; mutex_unlock(&cc->bio_alloc_lock);
}
return clone; return clone;
} }
...@@ -1020,7 +1038,6 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, ...@@ -1020,7 +1038,6 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc,
io->base_bio = bio; io->base_bio = bio;
io->sector = sector; io->sector = sector;
io->error = 0; io->error = 0;
io->base_io = NULL;
io->ctx.req = NULL; io->ctx.req = NULL;
atomic_set(&io->io_pending, 0); atomic_set(&io->io_pending, 0);
} }
...@@ -1033,13 +1050,11 @@ static void crypt_inc_pending(struct dm_crypt_io *io) ...@@ -1033,13 +1050,11 @@ static void crypt_inc_pending(struct dm_crypt_io *io)
/* /*
* One of the bios was finished. Check for completion of * One of the bios was finished. Check for completion of
* the whole request and correctly clean up the buffer. * the whole request and correctly clean up the buffer.
* If base_io is set, wait for the last fragment to complete.
*/ */
static void crypt_dec_pending(struct dm_crypt_io *io) static void crypt_dec_pending(struct dm_crypt_io *io)
{ {
struct crypt_config *cc = io->cc; struct crypt_config *cc = io->cc;
struct bio *base_bio = io->base_bio; struct bio *base_bio = io->base_bio;
struct dm_crypt_io *base_io = io->base_io;
int error = io->error; int error = io->error;
if (!atomic_dec_and_test(&io->io_pending)) if (!atomic_dec_and_test(&io->io_pending))
...@@ -1047,16 +1062,8 @@ static void crypt_dec_pending(struct dm_crypt_io *io) ...@@ -1047,16 +1062,8 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
if (io->ctx.req) if (io->ctx.req)
crypt_free_req(cc, io->ctx.req, base_bio); crypt_free_req(cc, io->ctx.req, base_bio);
if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size))
mempool_free(io, cc->io_pool); bio_endio(base_bio, error);
if (likely(!base_io))
bio_endio(base_bio, error);
else {
if (error && !base_io->error)
base_io->error = error;
crypt_dec_pending(base_io);
}
} }
/* /*
...@@ -1138,37 +1145,97 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) ...@@ -1138,37 +1145,97 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
return 0; return 0;
} }
static void kcryptd_io_read_work(struct work_struct *work)
{
struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
crypt_inc_pending(io);
if (kcryptd_io_read(io, GFP_NOIO))
io->error = -ENOMEM;
crypt_dec_pending(io);
}
static void kcryptd_queue_read(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
INIT_WORK(&io->work, kcryptd_io_read_work);
queue_work(cc->io_queue, &io->work);
}
static void kcryptd_io_write(struct dm_crypt_io *io) static void kcryptd_io_write(struct dm_crypt_io *io)
{ {
struct bio *clone = io->ctx.bio_out; struct bio *clone = io->ctx.bio_out;
generic_make_request(clone); generic_make_request(clone);
} }
static void kcryptd_io(struct work_struct *work) #define crypt_io_from_node(node) rb_entry((node), struct dm_crypt_io, rb_node)
static int dmcrypt_write(void *data)
{ {
struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); struct crypt_config *cc = data;
struct dm_crypt_io *io;
if (bio_data_dir(io->base_bio) == READ) { while (1) {
crypt_inc_pending(io); struct rb_root write_tree;
if (kcryptd_io_read(io, GFP_NOIO)) struct blk_plug plug;
io->error = -ENOMEM;
crypt_dec_pending(io);
} else
kcryptd_io_write(io);
}
static void kcryptd_queue_io(struct dm_crypt_io *io) DECLARE_WAITQUEUE(wait, current);
{
struct crypt_config *cc = io->cc;
INIT_WORK(&io->work, kcryptd_io); spin_lock_irq(&cc->write_thread_wait.lock);
queue_work(cc->io_queue, &io->work); continue_locked:
if (!RB_EMPTY_ROOT(&cc->write_tree))
goto pop_from_list;
__set_current_state(TASK_INTERRUPTIBLE);
__add_wait_queue(&cc->write_thread_wait, &wait);
spin_unlock_irq(&cc->write_thread_wait.lock);
if (unlikely(kthread_should_stop())) {
set_task_state(current, TASK_RUNNING);
remove_wait_queue(&cc->write_thread_wait, &wait);
break;
}
schedule();
set_task_state(current, TASK_RUNNING);
spin_lock_irq(&cc->write_thread_wait.lock);
__remove_wait_queue(&cc->write_thread_wait, &wait);
goto continue_locked;
pop_from_list:
write_tree = cc->write_tree;
cc->write_tree = RB_ROOT;
spin_unlock_irq(&cc->write_thread_wait.lock);
BUG_ON(rb_parent(write_tree.rb_node));
/*
* Note: we cannot walk the tree here with rb_next because
* the structures may be freed when kcryptd_io_write is called.
*/
blk_start_plug(&plug);
do {
io = crypt_io_from_node(rb_first(&write_tree));
rb_erase(&io->rb_node, &write_tree);
kcryptd_io_write(io);
} while (!RB_EMPTY_ROOT(&write_tree));
blk_finish_plug(&plug);
}
return 0;
} }
static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
{ {
struct bio *clone = io->ctx.bio_out; struct bio *clone = io->ctx.bio_out;
struct crypt_config *cc = io->cc; struct crypt_config *cc = io->cc;
unsigned long flags;
sector_t sector;
struct rb_node **rbp, *parent;
if (unlikely(io->error < 0)) { if (unlikely(io->error < 0)) {
crypt_free_buffer_pages(cc, clone); crypt_free_buffer_pages(cc, clone);
...@@ -1182,20 +1249,34 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) ...@@ -1182,20 +1249,34 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
clone->bi_iter.bi_sector = cc->start + io->sector; clone->bi_iter.bi_sector = cc->start + io->sector;
if (async) if (likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) {
kcryptd_queue_io(io);
else
generic_make_request(clone); generic_make_request(clone);
return;
}
spin_lock_irqsave(&cc->write_thread_wait.lock, flags);
rbp = &cc->write_tree.rb_node;
parent = NULL;
sector = io->sector;
while (*rbp) {
parent = *rbp;
if (sector < crypt_io_from_node(parent)->sector)
rbp = &(*rbp)->rb_left;
else
rbp = &(*rbp)->rb_right;
}
rb_link_node(&io->rb_node, parent, rbp);
rb_insert_color(&io->rb_node, &cc->write_tree);
wake_up_locked(&cc->write_thread_wait);
spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags);
} }
static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
{ {
struct crypt_config *cc = io->cc; struct crypt_config *cc = io->cc;
struct bio *clone; struct bio *clone;
struct dm_crypt_io *new_io;
int crypt_finished; int crypt_finished;
unsigned out_of_pages = 0;
unsigned remaining = io->base_bio->bi_iter.bi_size;
sector_t sector = io->sector; sector_t sector = io->sector;
int r; int r;
...@@ -1205,80 +1286,30 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) ...@@ -1205,80 +1286,30 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
crypt_inc_pending(io); crypt_inc_pending(io);
crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector); crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector);
/* clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
* The allocated buffers can be smaller than the whole bio, if (unlikely(!clone)) {
* so repeat the whole process until all the data can be handled. io->error = -EIO;
*/ goto dec;
while (remaining) { }
clone = crypt_alloc_buffer(io, remaining, &out_of_pages);
if (unlikely(!clone)) {
io->error = -ENOMEM;
break;
}
io->ctx.bio_out = clone;
io->ctx.iter_out = clone->bi_iter;
remaining -= clone->bi_iter.bi_size;
sector += bio_sectors(clone);
crypt_inc_pending(io);
r = crypt_convert(cc, &io->ctx);
if (r < 0)
io->error = -EIO;
crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
/* Encryption was already finished, submit io now */
if (crypt_finished) {
kcryptd_crypt_write_io_submit(io, 0);
/*
* If there was an error, do not try next fragments.
* For async, error is processed in async handler.
*/
if (unlikely(r < 0))
break;
io->sector = sector; io->ctx.bio_out = clone;
} io->ctx.iter_out = clone->bi_iter;
/* sector += bio_sectors(clone);
* Out of memory -> run queues
* But don't wait if split was due to the io size restriction
*/
if (unlikely(out_of_pages))
congestion_wait(BLK_RW_ASYNC, HZ/100);
/* crypt_inc_pending(io);
* With async crypto it is unsafe to share the crypto context r = crypt_convert(cc, &io->ctx);
* between fragments, so switch to a new dm_crypt_io structure. if (r)
*/ io->error = -EIO;
if (unlikely(!crypt_finished && remaining)) { crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
new_io = mempool_alloc(cc->io_pool, GFP_NOIO);
crypt_io_init(new_io, io->cc, io->base_bio, sector);
crypt_inc_pending(new_io);
crypt_convert_init(cc, &new_io->ctx, NULL,
io->base_bio, sector);
new_io->ctx.iter_in = io->ctx.iter_in;
/*
* Fragments after the first use the base_io
* pending count.
*/
if (!io->base_io)
new_io->base_io = io;
else {
new_io->base_io = io->base_io;
crypt_inc_pending(io->base_io);
crypt_dec_pending(io);
}
io = new_io; /* Encryption was already finished, submit io now */
} if (crypt_finished) {
kcryptd_crypt_write_io_submit(io, 0);
io->sector = sector;
} }
dec:
crypt_dec_pending(io); crypt_dec_pending(io);
} }
...@@ -1481,6 +1512,9 @@ static void crypt_dtr(struct dm_target *ti) ...@@ -1481,6 +1512,9 @@ static void crypt_dtr(struct dm_target *ti)
if (!cc) if (!cc)
return; return;
if (cc->write_thread)
kthread_stop(cc->write_thread);
if (cc->io_queue) if (cc->io_queue)
destroy_workqueue(cc->io_queue); destroy_workqueue(cc->io_queue);
if (cc->crypt_queue) if (cc->crypt_queue)
...@@ -1495,8 +1529,6 @@ static void crypt_dtr(struct dm_target *ti) ...@@ -1495,8 +1529,6 @@ static void crypt_dtr(struct dm_target *ti)
mempool_destroy(cc->page_pool); mempool_destroy(cc->page_pool);
if (cc->req_pool) if (cc->req_pool)
mempool_destroy(cc->req_pool); mempool_destroy(cc->req_pool);
if (cc->io_pool)
mempool_destroy(cc->io_pool);
if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
cc->iv_gen_ops->dtr(cc); cc->iv_gen_ops->dtr(cc);
...@@ -1688,7 +1720,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1688,7 +1720,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
char dummy; char dummy;
static struct dm_arg _args[] = { static struct dm_arg _args[] = {
{0, 1, "Invalid number of feature args"}, {0, 3, "Invalid number of feature args"},
}; };
if (argc < 5) { if (argc < 5) {
...@@ -1710,13 +1742,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1710,13 +1742,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (ret < 0) if (ret < 0)
goto bad; goto bad;
ret = -ENOMEM;
cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool);
if (!cc->io_pool) {
ti->error = "Cannot allocate crypt io mempool";
goto bad;
}
cc->dmreq_start = sizeof(struct ablkcipher_request); cc->dmreq_start = sizeof(struct ablkcipher_request);
cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc)); cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc));
cc->dmreq_start = ALIGN(cc->dmreq_start, __alignof__(struct dm_crypt_request)); cc->dmreq_start = ALIGN(cc->dmreq_start, __alignof__(struct dm_crypt_request));
...@@ -1734,6 +1759,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1734,6 +1759,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
iv_size_padding = crypto_ablkcipher_alignmask(any_tfm(cc)); iv_size_padding = crypto_ablkcipher_alignmask(any_tfm(cc));
} }
ret = -ENOMEM;
cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start +
sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size); sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size);
if (!cc->req_pool) { if (!cc->req_pool) {
...@@ -1746,7 +1772,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1746,7 +1772,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size, sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size,
ARCH_KMALLOC_MINALIGN); ARCH_KMALLOC_MINALIGN);
cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0);
if (!cc->page_pool) { if (!cc->page_pool) {
ti->error = "Cannot allocate page mempool"; ti->error = "Cannot allocate page mempool";
goto bad; goto bad;
...@@ -1758,6 +1784,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1758,6 +1784,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad; goto bad;
} }
mutex_init(&cc->bio_alloc_lock);
ret = -EINVAL; ret = -EINVAL;
if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) { if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) {
ti->error = "Invalid iv_offset sector"; ti->error = "Invalid iv_offset sector";
...@@ -1788,15 +1816,26 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1788,15 +1816,26 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (ret) if (ret)
goto bad; goto bad;
opt_string = dm_shift_arg(&as); while (opt_params--) {
opt_string = dm_shift_arg(&as);
if (!opt_string) {
ti->error = "Not enough feature arguments";
goto bad;
}
if (opt_params == 1 && opt_string && if (!strcasecmp(opt_string, "allow_discards"))
!strcasecmp(opt_string, "allow_discards")) ti->num_discard_bios = 1;
ti->num_discard_bios = 1;
else if (opt_params) { else if (!strcasecmp(opt_string, "same_cpu_crypt"))
ret = -EINVAL; set_bit(DM_CRYPT_SAME_CPU, &cc->flags);
ti->error = "Invalid feature arguments";
goto bad; else if (!strcasecmp(opt_string, "submit_from_crypt_cpus"))
set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
else {
ti->error = "Invalid feature arguments";
goto bad;
}
} }
} }
...@@ -1807,13 +1846,28 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1807,13 +1846,28 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad; goto bad;
} }
cc->crypt_queue = alloc_workqueue("kcryptd", if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1); cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
else
cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
num_online_cpus());
if (!cc->crypt_queue) { if (!cc->crypt_queue) {
ti->error = "Couldn't create kcryptd queue"; ti->error = "Couldn't create kcryptd queue";
goto bad; goto bad;
} }
init_waitqueue_head(&cc->write_thread_wait);
cc->write_tree = RB_ROOT;
cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
if (IS_ERR(cc->write_thread)) {
ret = PTR_ERR(cc->write_thread);
cc->write_thread = NULL;
ti->error = "Couldn't spawn write thread";
goto bad;
}
wake_up_process(cc->write_thread);
ti->num_flush_bios = 1; ti->num_flush_bios = 1;
ti->discard_zeroes_data_unsupported = true; ti->discard_zeroes_data_unsupported = true;
...@@ -1848,7 +1902,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) ...@@ -1848,7 +1902,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
if (bio_data_dir(io->base_bio) == READ) { if (bio_data_dir(io->base_bio) == READ) {
if (kcryptd_io_read(io, GFP_NOWAIT)) if (kcryptd_io_read(io, GFP_NOWAIT))
kcryptd_queue_io(io); kcryptd_queue_read(io);
} else } else
kcryptd_queue_crypt(io); kcryptd_queue_crypt(io);
...@@ -1860,6 +1914,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type, ...@@ -1860,6 +1914,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
{ {
struct crypt_config *cc = ti->private; struct crypt_config *cc = ti->private;
unsigned i, sz = 0; unsigned i, sz = 0;
int num_feature_args = 0;
switch (type) { switch (type) {
case STATUSTYPE_INFO: case STATUSTYPE_INFO:
...@@ -1878,8 +1933,18 @@ static void crypt_status(struct dm_target *ti, status_type_t type, ...@@ -1878,8 +1933,18 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
cc->dev->name, (unsigned long long)cc->start); cc->dev->name, (unsigned long long)cc->start);
if (ti->num_discard_bios) num_feature_args += !!ti->num_discard_bios;
DMEMIT(" 1 allow_discards"); num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
if (num_feature_args) {
DMEMIT(" %d", num_feature_args);
if (ti->num_discard_bios)
DMEMIT(" allow_discards");
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
DMEMIT(" same_cpu_crypt");
if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags))
DMEMIT(" submit_from_crypt_cpus");
}
break; break;
} }
...@@ -1976,7 +2041,7 @@ static int crypt_iterate_devices(struct dm_target *ti, ...@@ -1976,7 +2041,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
static struct target_type crypt_target = { static struct target_type crypt_target = {
.name = "crypt", .name = "crypt",
.version = {1, 13, 0}, .version = {1, 14, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = crypt_ctr, .ctr = crypt_ctr,
.dtr = crypt_dtr, .dtr = crypt_dtr,
...@@ -1994,15 +2059,9 @@ static int __init dm_crypt_init(void) ...@@ -1994,15 +2059,9 @@ static int __init dm_crypt_init(void)
{ {
int r; int r;
_crypt_io_pool = KMEM_CACHE(dm_crypt_io, 0);
if (!_crypt_io_pool)
return -ENOMEM;
r = dm_register_target(&crypt_target); r = dm_register_target(&crypt_target);
if (r < 0) { if (r < 0)
DMERR("register failed %d", r); DMERR("register failed %d", r);
kmem_cache_destroy(_crypt_io_pool);
}
return r; return r;
} }
...@@ -2010,7 +2069,6 @@ static int __init dm_crypt_init(void) ...@@ -2010,7 +2069,6 @@ static int __init dm_crypt_init(void)
static void __exit dm_crypt_exit(void) static void __exit dm_crypt_exit(void)
{ {
dm_unregister_target(&crypt_target); dm_unregister_target(&crypt_target);
kmem_cache_destroy(_crypt_io_pool);
} }
module_init(dm_crypt_init); module_init(dm_crypt_init);
......
...@@ -290,6 +290,12 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, ...@@ -290,6 +290,12 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
unsigned short logical_block_size = queue_logical_block_size(q); unsigned short logical_block_size = queue_logical_block_size(q);
sector_t num_sectors; sector_t num_sectors;
/* Reject unsupported discard requests */
if ((rw & REQ_DISCARD) && !blk_queue_discard(q)) {
dec_count(io, region, -EOPNOTSUPP);
return;
}
/* /*
* where->count may be zero if rw holds a flush and we need to * where->count may be zero if rw holds a flush and we need to
* send a zero-sized flush. * send a zero-sized flush.
......
...@@ -604,6 +604,15 @@ static void write_callback(unsigned long error, void *context) ...@@ -604,6 +604,15 @@ static void write_callback(unsigned long error, void *context)
return; return;
} }
/*
* If the bio is discard, return an error, but do not
* degrade the array.
*/
if (bio->bi_rw & REQ_DISCARD) {
bio_endio(bio, -EOPNOTSUPP);
return;
}
for (i = 0; i < ms->nr_mirrors; i++) for (i = 0; i < ms->nr_mirrors; i++)
if (test_bit(i, &error)) if (test_bit(i, &error))
fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR); fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR);
......
...@@ -1432,8 +1432,6 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) ...@@ -1432,8 +1432,6 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
full_bio->bi_private = pe->full_bio_private; full_bio->bi_private = pe->full_bio_private;
atomic_inc(&full_bio->bi_remaining); atomic_inc(&full_bio->bi_remaining);
} }
free_pending_exception(pe);
increment_pending_exceptions_done_count(); increment_pending_exceptions_done_count();
up_write(&s->lock); up_write(&s->lock);
...@@ -1450,6 +1448,8 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) ...@@ -1450,6 +1448,8 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
} }
retry_origin_bios(s, origin_bios); retry_origin_bios(s, origin_bios);
free_pending_exception(pe);
} }
static void commit_callback(void *context, int success) static void commit_callback(void *context, int success)
......
...@@ -2571,7 +2571,7 @@ int dm_setup_md_queue(struct mapped_device *md) ...@@ -2571,7 +2571,7 @@ int dm_setup_md_queue(struct mapped_device *md)
return 0; return 0;
} }
static struct mapped_device *dm_find_md(dev_t dev) struct mapped_device *dm_get_md(dev_t dev)
{ {
struct mapped_device *md; struct mapped_device *md;
unsigned minor = MINOR(dev); unsigned minor = MINOR(dev);
...@@ -2582,12 +2582,15 @@ static struct mapped_device *dm_find_md(dev_t dev) ...@@ -2582,12 +2582,15 @@ static struct mapped_device *dm_find_md(dev_t dev)
spin_lock(&_minor_lock); spin_lock(&_minor_lock);
md = idr_find(&_minor_idr, minor); md = idr_find(&_minor_idr, minor);
if (md && (md == MINOR_ALLOCED || if (md) {
(MINOR(disk_devt(dm_disk(md))) != minor) || if ((md == MINOR_ALLOCED ||
dm_deleting_md(md) || (MINOR(disk_devt(dm_disk(md))) != minor) ||
test_bit(DMF_FREEING, &md->flags))) { dm_deleting_md(md) ||
md = NULL; test_bit(DMF_FREEING, &md->flags))) {
goto out; md = NULL;
goto out;
}
dm_get(md);
} }
out: out:
...@@ -2595,16 +2598,6 @@ static struct mapped_device *dm_find_md(dev_t dev) ...@@ -2595,16 +2598,6 @@ static struct mapped_device *dm_find_md(dev_t dev)
return md; return md;
} }
struct mapped_device *dm_get_md(dev_t dev)
{
struct mapped_device *md = dm_find_md(dev);
if (md)
dm_get(md);
return md;
}
EXPORT_SYMBOL_GPL(dm_get_md); EXPORT_SYMBOL_GPL(dm_get_md);
void *dm_get_mdptr(struct mapped_device *md) void *dm_get_mdptr(struct mapped_device *md)
......
...@@ -78,7 +78,9 @@ static int sm_disk_count_is_more_than_one(struct dm_space_map *sm, dm_block_t b, ...@@ -78,7 +78,9 @@ static int sm_disk_count_is_more_than_one(struct dm_space_map *sm, dm_block_t b,
if (r) if (r)
return r; return r;
return count > 1; *result = count > 1;
return 0;
} }
static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b, static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment