Commit 1d94330a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.11/dm-fixes-1' of...

Merge tag 'for-5.11/dm-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Fix DM-raid's raid1 discard limits so discards work.

 - Select missing Kconfig dependencies for DM integrity and zoned
   targets.

 - Four fixes for DM crypt target's support to optionally bypass kcryptd
   workqueues.

 - Fix DM snapshot merge supports missing data flushes before committing
   metadata.

 - Fix DM integrity data device flushing when external metadata is used.

 - Fix DM integrity's maximum number of supported constructor arguments
   that user can request when creating an integrity device.

 - Eliminate DM core ioctl logging noise when an ioctl is issued without
   required CAP_SYS_RAWIO permission.

* tag 'for-5.11/dm-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm crypt: defer decryption to a tasklet if interrupts disabled
  dm integrity: fix the maximum number of arguments
  dm crypt: do not call bio_endio() from the dm-crypt tasklet
  dm integrity: fix flush with external metadata device
  dm: eliminate potential source of excessive kernel log noise
  dm snapshot: flush merged data before committing metadata
  dm crypt: use GFP_ATOMIC when allocating crypto requests from softirq
  dm crypt: do not wait for backlogged crypto request completion in softirq
  dm zoned: select CONFIG_CRC32
  dm integrity: select CRYPTO_SKCIPHER
  dm raid: fix discard limits for raid1
parents b45e2da6 c87a95dc
......@@ -605,6 +605,7 @@ config DM_INTEGRITY
select BLK_DEV_INTEGRITY
select DM_BUFIO
select CRYPTO
select CRYPTO_SKCIPHER
select ASYNC_XOR
help
This device-mapper target emulates a block device that has
......@@ -622,6 +623,7 @@ config DM_ZONED
tristate "Drive-managed zoned block device target support"
depends on BLK_DEV_DM
depends on BLK_DEV_ZONED
select CRC32
help
This device-mapper target takes a host-managed or host-aware zoned
block device and exposes most of its capacity as a regular block
......
......@@ -1534,6 +1534,12 @@ sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
}
EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);
struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c)
{
return c->dm_io;
}
EXPORT_SYMBOL_GPL(dm_bufio_get_dm_io_client);
sector_t dm_bufio_get_block_number(struct dm_buffer *b)
{
return b->block;
......
......@@ -1454,13 +1454,16 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
static void kcryptd_async_done(struct crypto_async_request *async_req,
int error);
static void crypt_alloc_req_skcipher(struct crypt_config *cc,
static int crypt_alloc_req_skcipher(struct crypt_config *cc,
struct convert_context *ctx)
{
unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1);
if (!ctx->r.req)
ctx->r.req = mempool_alloc(&cc->req_pool, GFP_NOIO);
if (!ctx->r.req) {
ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO);
if (!ctx->r.req)
return -ENOMEM;
}
skcipher_request_set_tfm(ctx->r.req, cc->cipher_tfm.tfms[key_index]);
......@@ -1471,13 +1474,18 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc,
skcipher_request_set_callback(ctx->r.req,
CRYPTO_TFM_REQ_MAY_BACKLOG,
kcryptd_async_done, dmreq_of_req(cc, ctx->r.req));
return 0;
}
static void crypt_alloc_req_aead(struct crypt_config *cc,
static int crypt_alloc_req_aead(struct crypt_config *cc,
struct convert_context *ctx)
{
if (!ctx->r.req_aead)
ctx->r.req_aead = mempool_alloc(&cc->req_pool, GFP_NOIO);
if (!ctx->r.req) {
ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO);
if (!ctx->r.req)
return -ENOMEM;
}
aead_request_set_tfm(ctx->r.req_aead, cc->cipher_tfm.tfms_aead[0]);
......@@ -1488,15 +1496,17 @@ static void crypt_alloc_req_aead(struct crypt_config *cc,
aead_request_set_callback(ctx->r.req_aead,
CRYPTO_TFM_REQ_MAY_BACKLOG,
kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead));
return 0;
}
static void crypt_alloc_req(struct crypt_config *cc,
static int crypt_alloc_req(struct crypt_config *cc,
struct convert_context *ctx)
{
if (crypt_integrity_aead(cc))
crypt_alloc_req_aead(cc, ctx);
return crypt_alloc_req_aead(cc, ctx);
else
crypt_alloc_req_skcipher(cc, ctx);
return crypt_alloc_req_skcipher(cc, ctx);
}
static void crypt_free_req_skcipher(struct crypt_config *cc,
......@@ -1529,17 +1539,28 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
* Encrypt / decrypt data from one bio to another one (can be the same one)
*/
static blk_status_t crypt_convert(struct crypt_config *cc,
struct convert_context *ctx, bool atomic)
struct convert_context *ctx, bool atomic, bool reset_pending)
{
unsigned int tag_offset = 0;
unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT;
int r;
atomic_set(&ctx->cc_pending, 1);
/*
* if reset_pending is set we are dealing with the bio for the first time,
* else we're continuing to work on the previous bio, so don't mess with
* the cc_pending counter
*/
if (reset_pending)
atomic_set(&ctx->cc_pending, 1);
while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) {
crypt_alloc_req(cc, ctx);
r = crypt_alloc_req(cc, ctx);
if (r) {
complete(&ctx->restart);
return BLK_STS_DEV_RESOURCE;
}
atomic_inc(&ctx->cc_pending);
if (crypt_integrity_aead(cc))
......@@ -1553,7 +1574,25 @@ static blk_status_t crypt_convert(struct crypt_config *cc,
* but the driver request queue is full, let's wait.
*/
case -EBUSY:
wait_for_completion(&ctx->restart);
if (in_interrupt()) {
if (try_wait_for_completion(&ctx->restart)) {
/*
* we don't have to block to wait for completion,
* so proceed
*/
} else {
/*
* we can't wait for completion without blocking
* exit and continue processing in a workqueue
*/
ctx->r.req = NULL;
ctx->cc_sector += sector_step;
tag_offset++;
return BLK_STS_DEV_RESOURCE;
}
} else {
wait_for_completion(&ctx->restart);
}
reinit_completion(&ctx->restart);
fallthrough;
/*
......@@ -1691,6 +1730,12 @@ static void crypt_inc_pending(struct dm_crypt_io *io)
atomic_inc(&io->io_pending);
}
static void kcryptd_io_bio_endio(struct work_struct *work)
{
struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
bio_endio(io->base_bio);
}
/*
* One of the bios was finished. Check for completion of
* the whole request and correctly clean up the buffer.
......@@ -1713,7 +1758,23 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
kfree(io->integrity_metadata);
base_bio->bi_status = error;
bio_endio(base_bio);
/*
* If we are running this function from our tasklet,
* we can't call bio_endio() here, because it will call
* clone_endio() from dm.c, which in turn will
* free the current struct dm_crypt_io structure with
* our tasklet. In this case we need to delay bio_endio()
* execution to after the tasklet is done and dequeued.
*/
if (tasklet_trylock(&io->tasklet)) {
tasklet_unlock(&io->tasklet);
bio_endio(base_bio);
return;
}
INIT_WORK(&io->work, kcryptd_io_bio_endio);
queue_work(cc->io_queue, &io->work);
}
/*
......@@ -1945,6 +2006,37 @@ static bool kcryptd_crypt_write_inline(struct crypt_config *cc,
}
}
static void kcryptd_crypt_write_continue(struct work_struct *work)
{
struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
struct crypt_config *cc = io->cc;
struct convert_context *ctx = &io->ctx;
int crypt_finished;
sector_t sector = io->sector;
blk_status_t r;
wait_for_completion(&ctx->restart);
reinit_completion(&ctx->restart);
r = crypt_convert(cc, &io->ctx, true, false);
if (r)
io->error = r;
crypt_finished = atomic_dec_and_test(&ctx->cc_pending);
if (!crypt_finished && kcryptd_crypt_write_inline(cc, ctx)) {
/* Wait for completion signaled by kcryptd_async_done() */
wait_for_completion(&ctx->restart);
crypt_finished = 1;
}
/* Encryption was already finished, submit io now */
if (crypt_finished) {
kcryptd_crypt_write_io_submit(io, 0);
io->sector = sector;
}
crypt_dec_pending(io);
}
static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
......@@ -1973,7 +2065,17 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
crypt_inc_pending(io);
r = crypt_convert(cc, ctx,
test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags));
test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags), true);
/*
* Crypto API backlogged the request, because its queue was full
* and we're in softirq context, so continue from a workqueue
* (TODO: is it actually possible to be in softirq in the write path?)
*/
if (r == BLK_STS_DEV_RESOURCE) {
INIT_WORK(&io->work, kcryptd_crypt_write_continue);
queue_work(cc->crypt_queue, &io->work);
return;
}
if (r)
io->error = r;
crypt_finished = atomic_dec_and_test(&ctx->cc_pending);
......@@ -1998,6 +2100,25 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
crypt_dec_pending(io);
}
static void kcryptd_crypt_read_continue(struct work_struct *work)
{
struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
struct crypt_config *cc = io->cc;
blk_status_t r;
wait_for_completion(&io->ctx.restart);
reinit_completion(&io->ctx.restart);
r = crypt_convert(cc, &io->ctx, true, false);
if (r)
io->error = r;
if (atomic_dec_and_test(&io->ctx.cc_pending))
kcryptd_crypt_read_done(io);
crypt_dec_pending(io);
}
static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
......@@ -2009,7 +2130,16 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
io->sector);
r = crypt_convert(cc, &io->ctx,
test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags));
test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
/*
* Crypto API backlogged the request, because its queue was full
* and we're in softirq context, so continue from a workqueue
*/
if (r == BLK_STS_DEV_RESOURCE) {
INIT_WORK(&io->work, kcryptd_crypt_read_continue);
queue_work(cc->crypt_queue, &io->work);
return;
}
if (r)
io->error = r;
......@@ -2091,8 +2221,12 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io)
if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) ||
(bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) {
if (in_irq()) {
/* Crypto API's "skcipher_walk_first() refuses to work in hard IRQ context */
/*
* in_irq(): Crypto API's skcipher_walk_first() refuses to work in hard IRQ context.
* irqs_disabled(): the kernel may run some IO completion from the idle thread, but
* it is being executed with irqs disabled.
*/
if (in_irq() || irqs_disabled()) {
tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work);
tasklet_schedule(&io->tasklet);
return;
......
......@@ -1379,12 +1379,52 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
#undef MAY_BE_HASH
}
static void dm_integrity_flush_buffers(struct dm_integrity_c *ic)
struct flush_request {
struct dm_io_request io_req;
struct dm_io_region io_reg;
struct dm_integrity_c *ic;
struct completion comp;
};
static void flush_notify(unsigned long error, void *fr_)
{
struct flush_request *fr = fr_;
if (unlikely(error != 0))
dm_integrity_io_error(fr->ic, "flusing disk cache", -EIO);
complete(&fr->comp);
}
static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data)
{
int r;
struct flush_request fr;
if (!ic->meta_dev)
flush_data = false;
if (flush_data) {
fr.io_req.bi_op = REQ_OP_WRITE,
fr.io_req.bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
fr.io_req.mem.type = DM_IO_KMEM,
fr.io_req.mem.ptr.addr = NULL,
fr.io_req.notify.fn = flush_notify,
fr.io_req.notify.context = &fr;
fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio),
fr.io_reg.bdev = ic->dev->bdev,
fr.io_reg.sector = 0,
fr.io_reg.count = 0,
fr.ic = ic;
init_completion(&fr.comp);
r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL);
BUG_ON(r);
}
r = dm_bufio_write_dirty_buffers(ic->bufio);
if (unlikely(r))
dm_integrity_io_error(ic, "writing tags", r);
if (flush_data)
wait_for_completion(&fr.comp);
}
static void sleep_on_endio_wait(struct dm_integrity_c *ic)
......@@ -2110,7 +2150,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) {
integrity_metadata(&dio->work);
dm_integrity_flush_buffers(ic);
dm_integrity_flush_buffers(ic, false);
dio->in_flight = (atomic_t)ATOMIC_INIT(1);
dio->completion = NULL;
......@@ -2195,7 +2235,7 @@ static void integrity_commit(struct work_struct *w)
flushes = bio_list_get(&ic->flush_bio_list);
if (unlikely(ic->mode != 'J')) {
spin_unlock_irq(&ic->endio_wait.lock);
dm_integrity_flush_buffers(ic);
dm_integrity_flush_buffers(ic, true);
goto release_flush_bios;
}
......@@ -2409,7 +2449,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
complete_journal_op(&comp);
wait_for_completion_io(&comp.comp);
dm_integrity_flush_buffers(ic);
dm_integrity_flush_buffers(ic, true);
}
static void integrity_writer(struct work_struct *w)
......@@ -2451,7 +2491,7 @@ static void recalc_write_super(struct dm_integrity_c *ic)
{
int r;
dm_integrity_flush_buffers(ic);
dm_integrity_flush_buffers(ic, false);
if (dm_integrity_failed(ic))
return;
......@@ -2654,7 +2694,7 @@ static void bitmap_flush_work(struct work_struct *work)
unsigned long limit;
struct bio *bio;
dm_integrity_flush_buffers(ic);
dm_integrity_flush_buffers(ic, false);
range.logical_sector = 0;
range.n_sectors = ic->provided_data_sectors;
......@@ -2663,9 +2703,7 @@ static void bitmap_flush_work(struct work_struct *work)
add_new_range_and_wait(ic, &range);
spin_unlock_irq(&ic->endio_wait.lock);
dm_integrity_flush_buffers(ic);
if (ic->meta_dev)
blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
dm_integrity_flush_buffers(ic, true);
limit = ic->provided_data_sectors;
if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
......@@ -2934,11 +2972,11 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
if (ic->meta_dev)
queue_work(ic->writer_wq, &ic->writer_work);
drain_workqueue(ic->writer_wq);
dm_integrity_flush_buffers(ic);
dm_integrity_flush_buffers(ic, true);
}
if (ic->mode == 'B') {
dm_integrity_flush_buffers(ic);
dm_integrity_flush_buffers(ic, true);
#if 1
/* set to 0 to test bitmap replay code */
init_journal(ic, 0, ic->journal_sections, 0);
......@@ -3754,7 +3792,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
unsigned extra_args;
struct dm_arg_set as;
static const struct dm_arg _args[] = {
{0, 9, "Invalid number of feature args"},
{0, 15, "Invalid number of feature args"},
};
unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
bool should_write_sb;
......
......@@ -3729,10 +3729,10 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
/*
* RAID1 and RAID10 personalities require bio splitting,
* RAID0/4/5/6 don't and process large discard bios properly.
* RAID0 and RAID10 personalities require bio splitting,
* RAID1/4/5/6 don't and process large discard bios properly.
*/
if (rs_is_raid1(rs) || rs_is_raid10(rs)) {
if (rs_is_raid0(rs) || rs_is_raid10(rs)) {
limits->discard_granularity = chunk_size_bytes;
limits->max_discard_sectors = rs->md.chunk_sectors;
}
......
......@@ -141,6 +141,11 @@ struct dm_snapshot {
* for them to be committed.
*/
struct bio_list bios_queued_during_merge;
/*
* Flush data after merge.
*/
struct bio flush_bio;
};
/*
......@@ -1121,6 +1126,17 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s)
static void error_bios(struct bio *bio);
static int flush_data(struct dm_snapshot *s)
{
struct bio *flush_bio = &s->flush_bio;
bio_reset(flush_bio);
bio_set_dev(flush_bio, s->origin->bdev);
flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
return submit_bio_wait(flush_bio);
}
static void merge_callback(int read_err, unsigned long write_err, void *context)
{
struct dm_snapshot *s = context;
......@@ -1134,6 +1150,11 @@ static void merge_callback(int read_err, unsigned long write_err, void *context)
goto shut;
}
if (flush_data(s) < 0) {
DMERR("Flush after merge failed: shutting down merge");
goto shut;
}
if (s->store->type->commit_merge(s->store,
s->num_merging_chunks) < 0) {
DMERR("Write error in exception store: shutting down merge");
......@@ -1318,6 +1339,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
s->first_merging_chunk = 0;
s->num_merging_chunks = 0;
bio_list_init(&s->bios_queued_during_merge);
bio_init(&s->flush_bio, NULL, 0);
/* Allocate hash table for COW data */
if (init_hash_tables(s)) {
......@@ -1504,6 +1526,8 @@ static void snapshot_dtr(struct dm_target *ti)
dm_exception_store_destroy(s->store);
bio_uninit(&s->flush_bio);
dm_put_device(ti, s->cow);
dm_put_device(ti, s->origin);
......
......@@ -562,7 +562,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
* subset of the parent bdev; require extra privileges.
*/
if (!capable(CAP_SYS_RAWIO)) {
DMWARN_LIMIT(
DMDEBUG_LIMIT(
"%s: sending ioctl %x to DM device without required privilege.",
current->comm, cmd);
r = -ENOIOCTLCMD;
......
......@@ -150,6 +150,7 @@ void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n);
unsigned dm_bufio_get_block_size(struct dm_bufio_client *c);
sector_t dm_bufio_get_device_size(struct dm_bufio_client *c);
struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c);
sector_t dm_bufio_get_block_number(struct dm_buffer *b);
void *dm_bufio_get_block_data(struct dm_buffer *b);
void *dm_bufio_get_aux_data(struct dm_buffer *b);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment