Commit 83c7c18b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-4.17/dm-changes' of...

Merge tag 'for-4.17/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - DM core passthrough ioctl fix to retain reference to DM table, and
   that table's block devices, while issuing the ioctl to one of those
   block devices.

 - DM core passthrough ioctl fix to _not_ override the fmode_t used to
   issue the ioctl. Overriding by using the fmode_t that the block
   device was originally open with during DM table load is a liability.

 - Add DM core support for secure erase forwarding and update the DM
   linear and DM striped targets to support them.

 - A DM core 4.16 stable fix to allow abnormal IO (e.g. discard, write
   same, write zeroes) for targets that make use of the non-splitting IO
   variant (as is done for multipath or thinp when layered directly on
   NVMe).

 - Allow DM targets to return a payload in response to a DM message that
   they are sent. This is useful for DM targets that would like to
   provide statistics data in response to DM messages.

 - Update DM bufio to support non-power-of-2 block sizes. Numerous other
   related changes prepare the DM bufio code for this support.

 - Fix DM crypt to use a bounded amount of memory across the entire
   system. This is to avoid OOM that can otherwise occur in response to
   certain pathological IO workloads (e.g. discarding a large DM crypt
   device).

 - Add a 'check_at_most_once' feature to the DM verity target to allow
   verity to be used on mobile devices that have very limited resources.

 - Fix the DM integrity target to fail early if a keyed algorithm (e.g.
   HMAC) is to be used but the key isn't set.

 - Add non-power-of-2 support to the DM unstripe target.

 - Eliminate the use of a Variable Length Array in the DM stripe target.

 - Update the DM log-writes target to record metadata (REQ_META flag).

 - DM raid fixes for its nosync status and some variable range issues.

* tag 'for-4.17/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (28 commits)
  dm: remove fmode_t argument from .prepare_ioctl hook
  dm: hold DM table for duration of ioctl rather than use blkdev_get
  dm raid: fix parse_raid_params() variable range issue
  dm verity: make verity_for_io_block static
  dm verity: add 'check_at_most_once' option to only validate hashes once
  dm bufio: don't embed a bio in the dm_buffer structure
  dm bufio: support non-power-of-two block sizes
  dm bufio: use slab cache for dm_buffer structure allocations
  dm bufio: reorder fields in dm_buffer structure
  dm bufio: relax alignment constraint on slab cache
  dm bufio: remove code that merges slab caches
  dm bufio: get rid of slab cache name allocations
  dm bufio: move dm-bufio.h to include/linux/
  dm bufio: delete outdated comment
  dm: add support for secure erase forwarding
  dm: backfill abnormal IO support to non-splitting IO submission
  dm raid: fix nosync status
  dm mpath: use DM_MAPIO_SUBMITTED instead of magic number 0 in process_queued_bios()
  dm stripe: get rid of a Variable Length Array (VLA)
  dm log writes: record metadata flag for better flags record
  ...
parents 9022ca6b 5bd5e8d8
......@@ -109,6 +109,17 @@ fec_start <offset>
This is the offset, in <data_block_size> blocks, from the start of the
FEC device to the beginning of the encoding data.
check_at_most_once
Verify data blocks only the first time they are read from the data device,
rather than every time. This reduces the overhead of dm-verity so that it
can be used on systems that are memory and/or CPU constrained. However, it
provides a reduced level of security because only offline tampering of the
data device's content will be detected, not online tampering.
Hash blocks are still verified each time they are read from the hash device,
since verification of hash blocks is less performance critical than data
blocks, and a hash block will not be verified any more after all the data
blocks it covers have been verified anyway.
Theory of operation
===================
......
......@@ -6,7 +6,7 @@
* This file is released under the GPL.
*/
#include "dm-bufio.h"
#include <linux/dm-bufio.h>
#include <linux/device-mapper.h>
#include <linux/dm-io.h>
......@@ -50,19 +50,6 @@
*/
#define DM_BUFIO_DEFAULT_RETAIN_BYTES (256 * 1024)
/*
* The number of bvec entries that are embedded directly in the buffer.
* If the chunk size is larger, dm-io is used to do the io.
*/
#define DM_BUFIO_INLINE_VECS 16
/*
* Don't try to use kmem_cache_alloc for blocks larger than this.
* For explanation, see alloc_buffer_data below.
*/
#define DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT (PAGE_SIZE >> 1)
#define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1))
/*
* Align buffer writes to this boundary.
* Tests show that SSDs have the highest IOPS when using 4k writes.
......@@ -99,13 +86,12 @@ struct dm_bufio_client {
struct block_device *bdev;
unsigned block_size;
unsigned char sectors_per_block_bits;
unsigned char pages_per_block_bits;
unsigned char blocks_per_page_bits;
unsigned aux_size;
s8 sectors_per_block_bits;
void (*alloc_callback)(struct dm_buffer *);
void (*write_callback)(struct dm_buffer *);
struct kmem_cache *slab_buffer;
struct kmem_cache *slab_cache;
struct dm_io_client *dm_io;
struct list_head reserved_buffers;
......@@ -148,11 +134,11 @@ struct dm_buffer {
struct list_head lru_list;
sector_t block;
void *data;
enum data_mode data_mode;
unsigned char data_mode; /* DATA_MODE_* */
unsigned char list_mode; /* LIST_* */
unsigned hold_count;
blk_status_t read_error;
blk_status_t write_error;
unsigned hold_count;
unsigned long state;
unsigned long last_accessed;
unsigned dirty_start;
......@@ -161,8 +147,7 @@ struct dm_buffer {
unsigned write_end;
struct dm_bufio_client *c;
struct list_head write_list;
struct bio bio;
struct bio_vec bio_vec[DM_BUFIO_INLINE_VECS];
void (*end_io)(struct dm_buffer *, blk_status_t);
#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
#define MAX_STACK 10
struct stack_trace stack_trace;
......@@ -172,21 +157,6 @@ struct dm_buffer {
/*----------------------------------------------------------------*/
static struct kmem_cache *dm_bufio_caches[PAGE_SHIFT - SECTOR_SHIFT];
static char *dm_bufio_cache_names[PAGE_SHIFT - SECTOR_SHIFT];
static inline int dm_bufio_cache_index(struct dm_bufio_client *c)
{
unsigned ret = c->blocks_per_page_bits - 1;
BUG_ON(ret >= ARRAY_SIZE(dm_bufio_caches));
return ret;
}
#define DM_BUFIO_CACHE(c) (dm_bufio_caches[dm_bufio_cache_index(c)])
#define DM_BUFIO_CACHE_NAME(c) (dm_bufio_cache_names[dm_bufio_cache_index(c)])
#define dm_bufio_in_request() (!!current->bio_list)
static void dm_bufio_lock(struct dm_bufio_client *c)
......@@ -319,7 +289,7 @@ static void __remove(struct dm_bufio_client *c, struct dm_buffer *b)
/*----------------------------------------------------------------*/
static void adjust_total_allocated(enum data_mode data_mode, long diff)
static void adjust_total_allocated(unsigned char data_mode, long diff)
{
static unsigned long * const class_ptr[DATA_MODE_LIMIT] = {
&dm_bufio_allocated_kmem_cache,
......@@ -384,18 +354,18 @@ static void __cache_size_refresh(void)
* space.
*/
static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
enum data_mode *data_mode)
unsigned char *data_mode)
{
if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
if (unlikely(c->slab_cache != NULL)) {
*data_mode = DATA_MODE_SLAB;
return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
return kmem_cache_alloc(c->slab_cache, gfp_mask);
}
if (c->block_size <= DM_BUFIO_BLOCK_SIZE_GFP_LIMIT &&
if (c->block_size <= KMALLOC_MAX_SIZE &&
gfp_mask & __GFP_NORETRY) {
*data_mode = DATA_MODE_GET_FREE_PAGES;
return (void *)__get_free_pages(gfp_mask,
c->pages_per_block_bits);
c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT));
}
*data_mode = DATA_MODE_VMALLOC;
......@@ -424,15 +394,16 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
* Free buffer's data.
*/
static void free_buffer_data(struct dm_bufio_client *c,
void *data, enum data_mode data_mode)
void *data, unsigned char data_mode)
{
switch (data_mode) {
case DATA_MODE_SLAB:
kmem_cache_free(DM_BUFIO_CACHE(c), data);
kmem_cache_free(c->slab_cache, data);
break;
case DATA_MODE_GET_FREE_PAGES:
free_pages((unsigned long)data, c->pages_per_block_bits);
free_pages((unsigned long)data,
c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT));
break;
case DATA_MODE_VMALLOC:
......@@ -451,8 +422,7 @@ static void free_buffer_data(struct dm_bufio_client *c,
*/
static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
{
struct dm_buffer *b = kmalloc(sizeof(struct dm_buffer) + c->aux_size,
gfp_mask);
struct dm_buffer *b = kmem_cache_alloc(c->slab_buffer, gfp_mask);
if (!b)
return NULL;
......@@ -461,7 +431,7 @@ static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
b->data = alloc_buffer_data(c, gfp_mask, &b->data_mode);
if (!b->data) {
kfree(b);
kmem_cache_free(c->slab_buffer, b);
return NULL;
}
......@@ -483,7 +453,7 @@ static void free_buffer(struct dm_buffer *b)
adjust_total_allocated(b->data_mode, -(long)c->block_size);
free_buffer_data(c, b->data, b->data_mode);
kfree(b);
kmem_cache_free(c->slab_buffer, b);
}
/*
......@@ -540,10 +510,6 @@ static void __relink_lru(struct dm_buffer *b, int dirty)
*
* the memory must be direct-mapped, not vmalloced;
*
* the I/O driver can reject requests spuriously if it thinks that
* the requests are too big for the device or if they cross a
* controller-defined memory boundary.
*
* If the buffer is small enough (up to DM_BUFIO_INLINE_VECS pages) and
* it is not vmalloced, try using the bio interface.
*
......@@ -561,12 +527,11 @@ static void dmio_complete(unsigned long error, void *context)
{
struct dm_buffer *b = context;
b->bio.bi_status = error ? BLK_STS_IOERR : 0;
b->bio.bi_end_io(&b->bio);
b->end_io(b, unlikely(error != 0) ? BLK_STS_IOERR : 0);
}
static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
unsigned n_sectors, unsigned offset, bio_end_io_t *end_io)
unsigned n_sectors, unsigned offset)
{
int r;
struct dm_io_request io_req = {
......@@ -590,76 +555,77 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
io_req.mem.ptr.vma = (char *)b->data + offset;
}
b->bio.bi_end_io = end_io;
r = dm_io(&io_req, 1, &region, NULL);
if (r) {
b->bio.bi_status = errno_to_blk_status(r);
end_io(&b->bio);
}
if (unlikely(r))
b->end_io(b, errno_to_blk_status(r));
}
static void inline_endio(struct bio *bio)
static void bio_complete(struct bio *bio)
{
bio_end_io_t *end_fn = bio->bi_private;
struct dm_buffer *b = bio->bi_private;
blk_status_t status = bio->bi_status;
/*
* Reset the bio to free any attached resources
* (e.g. bio integrity profiles).
*/
bio_reset(bio);
bio->bi_status = status;
end_fn(bio);
bio_put(bio);
b->end_io(b, status);
}
static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector,
unsigned n_sectors, unsigned offset, bio_end_io_t *end_io)
static void use_bio(struct dm_buffer *b, int rw, sector_t sector,
unsigned n_sectors, unsigned offset)
{
struct bio *bio;
char *ptr;
unsigned len;
unsigned vec_size, len;
bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS);
b->bio.bi_iter.bi_sector = sector;
bio_set_dev(&b->bio, b->c->bdev);
b->bio.bi_end_io = inline_endio;
/*
* Use of .bi_private isn't a problem here because
* the dm_buffer's inline bio is local to bufio.
*/
b->bio.bi_private = end_io;
bio_set_op_attrs(&b->bio, rw, 0);
vec_size = b->c->block_size >> PAGE_SHIFT;
if (unlikely(b->c->sectors_per_block_bits < PAGE_SHIFT - SECTOR_SHIFT))
vec_size += 2;
bio = bio_kmalloc(GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN, vec_size);
if (!bio) {
dmio:
use_dmio(b, rw, sector, n_sectors, offset);
return;
}
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, b->c->bdev);
bio_set_op_attrs(bio, rw, 0);
bio->bi_end_io = bio_complete;
bio->bi_private = b;
ptr = (char *)b->data + offset;
len = n_sectors << SECTOR_SHIFT;
do {
unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len);
if (!bio_add_page(&b->bio, virt_to_page(ptr), this_step,
if (!bio_add_page(bio, virt_to_page(ptr), this_step,
offset_in_page(ptr))) {
BUG_ON(b->c->block_size <= PAGE_SIZE);
use_dmio(b, rw, sector, n_sectors, offset, end_io);
return;
bio_put(bio);
goto dmio;
}
len -= this_step;
ptr += this_step;
} while (len > 0);
submit_bio(&b->bio);
submit_bio(bio);
}
static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io)
static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buffer *, blk_status_t))
{
unsigned n_sectors;
sector_t sector;
unsigned offset, end;
sector = (b->block << b->c->sectors_per_block_bits) + b->c->start;
b->end_io = end_io;
if (likely(b->c->sectors_per_block_bits >= 0))
sector = b->block << b->c->sectors_per_block_bits;
else
sector = b->block * (b->c->block_size >> SECTOR_SHIFT);
sector += b->c->start;
if (rw != REQ_OP_WRITE) {
n_sectors = 1 << b->c->sectors_per_block_bits;
n_sectors = b->c->block_size >> SECTOR_SHIFT;
offset = 0;
} else {
if (b->c->write_callback)
......@@ -676,11 +642,10 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io)
n_sectors = (end - offset) >> SECTOR_SHIFT;
}
if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) &&
b->data_mode != DATA_MODE_VMALLOC)
use_inline_bio(b, rw, sector, n_sectors, offset, end_io);
if (b->data_mode != DATA_MODE_VMALLOC)
use_bio(b, rw, sector, n_sectors, offset);
else
use_dmio(b, rw, sector, n_sectors, offset, end_io);
use_dmio(b, rw, sector, n_sectors, offset);
}
/*----------------------------------------------------------------
......@@ -693,16 +658,14 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io)
* Set the error, clear B_WRITING bit and wake anyone who was waiting on
* it.
*/
static void write_endio(struct bio *bio)
static void write_endio(struct dm_buffer *b, blk_status_t status)
{
struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
b->write_error = bio->bi_status;
if (unlikely(bio->bi_status)) {
b->write_error = status;
if (unlikely(status)) {
struct dm_bufio_client *c = b->c;
(void)cmpxchg(&c->async_write_error, 0,
blk_status_to_errno(bio->bi_status));
blk_status_to_errno(status));
}
BUG_ON(!test_bit(B_WRITING, &b->state));
......@@ -963,8 +926,11 @@ static void __get_memory_limit(struct dm_bufio_client *c,
}
}
buffers = dm_bufio_cache_size_per_client >>
(c->sectors_per_block_bits + SECTOR_SHIFT);
buffers = dm_bufio_cache_size_per_client;
if (likely(c->sectors_per_block_bits >= 0))
buffers >>= c->sectors_per_block_bits + SECTOR_SHIFT;
else
buffers /= c->block_size;
if (buffers < c->minimum_buffers)
buffers = c->minimum_buffers;
......@@ -1076,11 +1042,9 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
* The endio routine for reading: set the error, clear the bit and wake up
* anyone waiting on the buffer.
*/
static void read_endio(struct bio *bio)
static void read_endio(struct dm_buffer *b, blk_status_t status)
{
struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
b->read_error = bio->bi_status;
b->read_error = status;
BUG_ON(!test_bit(B_READING, &b->state));
......@@ -1482,13 +1446,13 @@ void dm_bufio_forget(struct dm_bufio_client *c, sector_t block)
dm_bufio_unlock(c);
}
EXPORT_SYMBOL(dm_bufio_forget);
EXPORT_SYMBOL_GPL(dm_bufio_forget);
void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n)
{
c->minimum_buffers = n;
}
EXPORT_SYMBOL(dm_bufio_set_minimum_buffers);
EXPORT_SYMBOL_GPL(dm_bufio_set_minimum_buffers);
unsigned dm_bufio_get_block_size(struct dm_bufio_client *c)
{
......@@ -1498,8 +1462,12 @@ EXPORT_SYMBOL_GPL(dm_bufio_get_block_size);
sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
{
return i_size_read(c->bdev->bd_inode) >>
(SECTOR_SHIFT + c->sectors_per_block_bits);
sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT;
if (likely(c->sectors_per_block_bits >= 0))
s >>= c->sectors_per_block_bits;
else
sector_div(s, c->block_size >> SECTOR_SHIFT);
return s;
}
EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);
......@@ -1597,8 +1565,12 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp)
static unsigned long get_retain_buffers(struct dm_bufio_client *c)
{
unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes);
return retain_bytes >> (c->sectors_per_block_bits + SECTOR_SHIFT);
unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes);
if (likely(c->sectors_per_block_bits >= 0))
retain_bytes >>= c->sectors_per_block_bits + SECTOR_SHIFT;
else
retain_bytes /= c->block_size;
return retain_bytes;
}
static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
......@@ -1662,9 +1634,13 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
int r;
struct dm_bufio_client *c;
unsigned i;
char slab_name[27];
BUG_ON(block_size < 1 << SECTOR_SHIFT ||
(block_size & (block_size - 1)));
if (!block_size || block_size & ((1 << SECTOR_SHIFT) - 1)) {
DMERR("%s: block size not specified or is not multiple of 512b", __func__);
r = -EINVAL;
goto bad_client;
}
c = kzalloc(sizeof(*c), GFP_KERNEL);
if (!c) {
......@@ -1675,13 +1651,11 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
c->bdev = bdev;
c->block_size = block_size;
c->sectors_per_block_bits = __ffs(block_size) - SECTOR_SHIFT;
c->pages_per_block_bits = (__ffs(block_size) >= PAGE_SHIFT) ?
__ffs(block_size) - PAGE_SHIFT : 0;
c->blocks_per_page_bits = (__ffs(block_size) < PAGE_SHIFT ?
PAGE_SHIFT - __ffs(block_size) : 0);
if (is_power_of_2(block_size))
c->sectors_per_block_bits = __ffs(block_size) - SECTOR_SHIFT;
else
c->sectors_per_block_bits = -1;
c->aux_size = aux_size;
c->alloc_callback = alloc_callback;
c->write_callback = write_callback;
......@@ -1694,7 +1668,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
INIT_LIST_HEAD(&c->reserved_buffers);
c->need_reserved_buffers = reserved_buffers;
c->minimum_buffers = DM_BUFIO_MIN_BUFFERS;
dm_bufio_set_minimum_buffers(c, DM_BUFIO_MIN_BUFFERS);
init_waitqueue_head(&c->free_buffer_wait);
c->async_write_error = 0;
......@@ -1705,29 +1679,26 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
goto bad_dm_io;
}
mutex_lock(&dm_bufio_clients_lock);
if (c->blocks_per_page_bits) {
if (!DM_BUFIO_CACHE_NAME(c)) {
DM_BUFIO_CACHE_NAME(c) = kasprintf(GFP_KERNEL, "dm_bufio_cache-%u", c->block_size);
if (!DM_BUFIO_CACHE_NAME(c)) {
r = -ENOMEM;
mutex_unlock(&dm_bufio_clients_lock);
goto bad;
}
}
if (!DM_BUFIO_CACHE(c)) {
DM_BUFIO_CACHE(c) = kmem_cache_create(DM_BUFIO_CACHE_NAME(c),
c->block_size,
c->block_size, 0, NULL);
if (!DM_BUFIO_CACHE(c)) {
r = -ENOMEM;
mutex_unlock(&dm_bufio_clients_lock);
goto bad;
}
if (block_size <= KMALLOC_MAX_SIZE &&
(block_size < PAGE_SIZE || !is_power_of_2(block_size))) {
snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", c->block_size);
c->slab_cache = kmem_cache_create(slab_name, c->block_size, ARCH_KMALLOC_MINALIGN,
SLAB_RECLAIM_ACCOUNT, NULL);
if (!c->slab_cache) {
r = -ENOMEM;
goto bad;
}
}
mutex_unlock(&dm_bufio_clients_lock);
if (aux_size)
snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer-%u", aux_size);
else
snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer");
c->slab_buffer = kmem_cache_create(slab_name, sizeof(struct dm_buffer) + aux_size,
0, SLAB_RECLAIM_ACCOUNT, NULL);
if (!c->slab_buffer) {
r = -ENOMEM;
goto bad;
}
while (c->need_reserved_buffers) {
struct dm_buffer *b = alloc_buffer(c, GFP_KERNEL);
......@@ -1762,6 +1733,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
list_del(&b->lru_list);
free_buffer(b);
}
kmem_cache_destroy(c->slab_cache);
kmem_cache_destroy(c->slab_buffer);
dm_io_client_destroy(c->dm_io);
bad_dm_io:
mutex_destroy(&c->lock);
......@@ -1808,6 +1781,8 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c)
for (i = 0; i < LIST_SIZE; i++)
BUG_ON(c->n_buffers[i]);
kmem_cache_destroy(c->slab_cache);
kmem_cache_destroy(c->slab_buffer);
dm_io_client_destroy(c->dm_io);
mutex_destroy(&c->lock);
kfree(c);
......@@ -1911,9 +1886,6 @@ static int __init dm_bufio_init(void)
dm_bufio_allocated_vmalloc = 0;
dm_bufio_current_allocated = 0;
memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches);
memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names);
mem = (__u64)mult_frac(totalram_pages - totalhigh_pages,
DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT;
......@@ -1948,17 +1920,10 @@ static int __init dm_bufio_init(void)
static void __exit dm_bufio_exit(void)
{
int bug = 0;
int i;
cancel_delayed_work_sync(&dm_bufio_work);
destroy_workqueue(dm_bufio_wq);
for (i = 0; i < ARRAY_SIZE(dm_bufio_caches); i++)
kmem_cache_destroy(dm_bufio_caches[i]);
for (i = 0; i < ARRAY_SIZE(dm_bufio_cache_names); i++)
kfree(dm_bufio_cache_names[i]);
if (dm_bufio_client_count) {
DMCRIT("%s: dm_bufio_client_count leaked: %d",
__func__, dm_bufio_client_count);
......
......@@ -3387,7 +3387,8 @@ static int process_invalidate_cblocks_message(struct cache *cache, unsigned coun
*
* The key migration_threshold is supported by the cache target core.
*/
static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
static int cache_message(struct dm_target *ti, unsigned argc, char **argv,
char *result, unsigned maxlen)
{
struct cache *cache = ti->private;
......
......@@ -148,6 +148,8 @@ struct crypt_config {
mempool_t *tag_pool;
unsigned tag_pool_max_sectors;
struct percpu_counter n_allocated_pages;
struct bio_set *bs;
struct mutex bio_alloc_lock;
......@@ -219,6 +221,12 @@ struct crypt_config {
#define MAX_TAG_SIZE 480
#define POOL_ENTRY_SIZE 512
static DEFINE_SPINLOCK(dm_crypt_clients_lock);
static unsigned dm_crypt_clients_n = 0;
static volatile unsigned long dm_crypt_pages_per_client;
#define DM_CRYPT_MEMORY_PERCENT 2
#define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_PAGES * 16)
static void clone_init(struct dm_crypt_io *, struct bio *);
static void kcryptd_queue_crypt(struct dm_crypt_io *io);
static struct scatterlist *crypt_get_sg_data(struct crypt_config *cc,
......@@ -2155,6 +2163,43 @@ static int crypt_wipe_key(struct crypt_config *cc)
return r;
}
static void crypt_calculate_pages_per_client(void)
{
unsigned long pages = (totalram_pages - totalhigh_pages) * DM_CRYPT_MEMORY_PERCENT / 100;
if (!dm_crypt_clients_n)
return;
pages /= dm_crypt_clients_n;
if (pages < DM_CRYPT_MIN_PAGES_PER_CLIENT)
pages = DM_CRYPT_MIN_PAGES_PER_CLIENT;
dm_crypt_pages_per_client = pages;
}
static void *crypt_page_alloc(gfp_t gfp_mask, void *pool_data)
{
struct crypt_config *cc = pool_data;
struct page *page;
if (unlikely(percpu_counter_compare(&cc->n_allocated_pages, dm_crypt_pages_per_client) >= 0) &&
likely(gfp_mask & __GFP_NORETRY))
return NULL;
page = alloc_page(gfp_mask);
if (likely(page != NULL))
percpu_counter_add(&cc->n_allocated_pages, 1);
return page;
}
static void crypt_page_free(void *page, void *pool_data)
{
struct crypt_config *cc = pool_data;
__free_page(page);
percpu_counter_sub(&cc->n_allocated_pages, 1);
}
static void crypt_dtr(struct dm_target *ti)
{
struct crypt_config *cc = ti->private;
......@@ -2181,6 +2226,10 @@ static void crypt_dtr(struct dm_target *ti)
mempool_destroy(cc->req_pool);
mempool_destroy(cc->tag_pool);
if (cc->page_pool)
WARN_ON(percpu_counter_sum(&cc->n_allocated_pages) != 0);
percpu_counter_destroy(&cc->n_allocated_pages);
if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
cc->iv_gen_ops->dtr(cc);
......@@ -2197,6 +2246,12 @@ static void crypt_dtr(struct dm_target *ti)
/* Must zero key material before freeing */
kzfree(cc);
spin_lock(&dm_crypt_clients_lock);
WARN_ON(!dm_crypt_clients_n);
dm_crypt_clients_n--;
crypt_calculate_pages_per_client();
spin_unlock(&dm_crypt_clients_lock);
}
static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
......@@ -2644,6 +2699,15 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->private = cc;
spin_lock(&dm_crypt_clients_lock);
dm_crypt_clients_n++;
crypt_calculate_pages_per_client();
spin_unlock(&dm_crypt_clients_lock);
ret = percpu_counter_init(&cc->n_allocated_pages, 0, GFP_KERNEL);
if (ret < 0)
goto bad;
/* Optional parameters need to be read before cipher constructor */
if (argc > 5) {
ret = crypt_ctr_optional(ti, argc - 5, &argv[5]);
......@@ -2698,7 +2762,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start + additional_req_size,
ARCH_KMALLOC_MINALIGN);
cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0);
cc->page_pool = mempool_create(BIO_MAX_PAGES, crypt_page_alloc, crypt_page_free, cc);
if (!cc->page_pool) {
ti->error = "Cannot allocate page mempool";
goto bad;
......@@ -2942,7 +3006,8 @@ static void crypt_resume(struct dm_target *ti)
* key set <key>
* key wipe
*/
static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
static int crypt_message(struct dm_target *ti, unsigned argc, char **argv,
char *result, unsigned maxlen)
{
struct crypt_config *cc = ti->private;
int key_size, ret = -EINVAL;
......
......@@ -1635,7 +1635,8 @@ static void era_status(struct dm_target *ti, status_type_t type,
DMEMIT("Error");
}
static int era_message(struct dm_target *ti, unsigned argc, char **argv)
static int era_message(struct dm_target *ti, unsigned argc, char **argv,
char *result, unsigned maxlen)
{
struct era *era = ti->private;
......
......@@ -442,8 +442,7 @@ static void flakey_status(struct dm_target *ti, status_type_t type,
}
}
static int flakey_prepare_ioctl(struct dm_target *ti,
struct block_device **bdev, fmode_t *mode)
static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
{
struct flakey_c *fc = ti->private;
......
......@@ -18,7 +18,7 @@
#include <crypto/hash.h>
#include <crypto/skcipher.h>
#include <linux/async_tx.h>
#include "dm-bufio.h"
#include <linux/dm-bufio.h>
#define DM_MSG_PREFIX "integrity"
......@@ -2548,6 +2548,9 @@ static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error,
*error = error_key;
return r;
}
} else if (crypto_shash_get_flags(*hash) & CRYPTO_TFM_NEED_KEY) {
*error = error_key;
return -ENOKEY;
}
}
......
......@@ -1595,7 +1595,7 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para
DMWARN("Target message sector outside device.");
r = -EINVAL;
} else if (ti->type->message)
r = ti->type->message(ti, argc, argv);
r = ti->type->message(ti, argc, argv, result, maxlen);
else {
DMWARN("Target type does not support messages");
r = -EINVAL;
......
......@@ -59,6 +59,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_flush_bios = 1;
ti->num_discard_bios = 1;
ti->num_secure_erase_bios = 1;
ti->num_write_same_bios = 1;
ti->num_write_zeroes_bios = 1;
ti->private = lc;
......@@ -129,8 +130,7 @@ static void linear_status(struct dm_target *ti, status_type_t type,
}
}
static int linear_prepare_ioctl(struct dm_target *ti,
struct block_device **bdev, fmode_t *mode)
static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
{
struct linear_c *lc = (struct linear_c *) ti->private;
struct dm_dev *dev = lc->dev;
......
......@@ -52,10 +52,11 @@
* in fact we want to do the data and the discard in the order that they
* completed.
*/
#define LOG_FLUSH_FLAG (1 << 0)
#define LOG_FUA_FLAG (1 << 1)
#define LOG_DISCARD_FLAG (1 << 2)
#define LOG_MARK_FLAG (1 << 3)
#define LOG_FLUSH_FLAG (1 << 0)
#define LOG_FUA_FLAG (1 << 1)
#define LOG_DISCARD_FLAG (1 << 2)
#define LOG_MARK_FLAG (1 << 3)
#define LOG_METADATA_FLAG (1 << 4)
#define WRITE_LOG_VERSION 1ULL
#define WRITE_LOG_MAGIC 0x6a736677736872ULL
......@@ -699,6 +700,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
bool flush_bio = (bio->bi_opf & REQ_PREFLUSH);
bool fua_bio = (bio->bi_opf & REQ_FUA);
bool discard_bio = (bio_op(bio) == REQ_OP_DISCARD);
bool meta_bio = (bio->bi_opf & REQ_META);
pb->block = NULL;
......@@ -743,6 +745,8 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
block->flags |= LOG_FUA_FLAG;
if (discard_bio)
block->flags |= LOG_DISCARD_FLAG;
if (meta_bio)
block->flags |= LOG_METADATA_FLAG;
block->sector = bio_to_dev_sectors(lc, bio->bi_iter.bi_sector);
block->nr_sectors = bio_to_dev_sectors(lc, bio_sectors(bio));
......@@ -860,7 +864,7 @@ static void log_writes_status(struct dm_target *ti, status_type_t type,
}
static int log_writes_prepare_ioctl(struct dm_target *ti,
struct block_device **bdev, fmode_t *mode)
struct block_device **bdev)
{
struct log_writes_c *lc = ti->private;
struct dm_dev *dev = lc->dev;
......@@ -887,7 +891,8 @@ static int log_writes_iterate_devices(struct dm_target *ti,
* Messages supported:
* mark <mark data> - specify the marked data.
*/
static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv)
static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv,
char *result, unsigned maxlen)
{
int r = -EINVAL;
struct log_writes_c *lc = ti->private;
......
......@@ -714,7 +714,7 @@ static void process_queued_bios(struct work_struct *work)
case DM_MAPIO_REMAPPED:
generic_make_request(bio);
break;
case 0:
case DM_MAPIO_SUBMITTED:
break;
default:
WARN_ONCE(true, "__multipath_map_bio() returned %d\n", r);
......@@ -1811,7 +1811,8 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
spin_unlock_irqrestore(&m->lock, flags);
}
static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
static int multipath_message(struct dm_target *ti, unsigned argc, char **argv,
char *result, unsigned maxlen)
{
int r = -EINVAL;
struct dm_dev *dev;
......@@ -1875,7 +1876,7 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
}
static int multipath_prepare_ioctl(struct dm_target *ti,
struct block_device **bdev, fmode_t *mode)
struct block_device **bdev)
{
struct multipath *m = ti->private;
struct pgpath *current_pgpath;
......@@ -1888,7 +1889,6 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
if (current_pgpath) {
if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) {
*bdev = current_pgpath->path.dev->bdev;
*mode = current_pgpath->path.dev->mode;
r = 0;
} else {
/* pg_init has not started or completed */
......
......@@ -1370,19 +1370,18 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
* In device-mapper, we specify things in sectors, but
* MD records this value in kB
*/
value /= 2;
if (value > COUNTER_MAX) {
if (value < 0 || value / 2 > COUNTER_MAX) {
rs->ti->error = "Max write-behind limit out of range";
return -EINVAL;
}
rs->md.bitmap_info.max_write_behind = value;
rs->md.bitmap_info.max_write_behind = value / 2;
} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP))) {
if (test_and_set_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags)) {
rs->ti->error = "Only one daemon_sleep argument pair allowed";
return -EINVAL;
}
if (!value || (value > MAX_SCHEDULE_TIMEOUT)) {
if (value < 0) {
rs->ti->error = "daemon sleep period out of range";
return -EINVAL;
}
......@@ -1424,27 +1423,33 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
return -EINVAL;
}
if (value < 0) {
rs->ti->error = "Bogus stripe cache entries value";
return -EINVAL;
}
rs->stripe_cache_entries = value;
} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE))) {
if (test_and_set_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) {
rs->ti->error = "Only one min_recovery_rate argument pair allowed";
return -EINVAL;
}
if (value > INT_MAX) {
if (value < 0) {
rs->ti->error = "min_recovery_rate out of range";
return -EINVAL;
}
rs->md.sync_speed_min = (int)value;
rs->md.sync_speed_min = value;
} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE))) {
if (test_and_set_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags)) {
rs->ti->error = "Only one max_recovery_rate argument pair allowed";
return -EINVAL;
}
if (value > INT_MAX) {
if (value < 0) {
rs->ti->error = "max_recovery_rate out of range";
return -EINVAL;
}
rs->md.sync_speed_max = (int)value;
rs->md.sync_speed_max = value;
} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_REGION_SIZE))) {
if (test_and_set_bit(__CTR_FLAG_REGION_SIZE, &rs->ctr_flags)) {
rs->ti->error = "Only one region_size argument pair allowed";
......@@ -1490,6 +1495,12 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
return -EINVAL;
}
if (rs->md.sync_speed_max &&
rs->md.sync_speed_min > rs->md.sync_speed_max) {
rs->ti->error = "Bogus recovery rates";
return -EINVAL;
}
if (validate_region_size(rs, region_size))
return -EINVAL;
......@@ -3408,7 +3419,8 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
} else {
if (!test_bit(MD_RECOVERY_INTR, &recovery) &&
if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) &&
!test_bit(MD_RECOVERY_INTR, &recovery) &&
(test_bit(MD_RECOVERY_NEEDED, &recovery) ||
test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
test_bit(MD_RECOVERY_RUNNING, &recovery)))
......@@ -3663,7 +3675,8 @@ static void raid_status(struct dm_target *ti, status_type_t type,
}
}
static int raid_message(struct dm_target *ti, unsigned int argc, char **argv)
static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
char *result, unsigned maxlen)
{
struct raid_set *rs = ti->private;
struct mddev *mddev = &rs->md;
......
......@@ -14,7 +14,7 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/dm-io.h>
#include "dm-bufio.h"
#include <linux/dm-bufio.h>
#define DM_MSG_PREFIX "persistent snapshot"
#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */
......
......@@ -169,6 +169,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_flush_bios = stripes;
ti->num_discard_bios = stripes;
ti->num_secure_erase_bios = stripes;
ti->num_write_same_bios = stripes;
ti->num_write_zeroes_bios = stripes;
......@@ -295,6 +296,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
if (unlikely(bio_op(bio) == REQ_OP_DISCARD) ||
unlikely(bio_op(bio) == REQ_OP_SECURE_ERASE) ||
unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES) ||
unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) {
target_bio_nr = dm_bio_get_target_bio_nr(bio);
......@@ -368,7 +370,6 @@ static void stripe_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
{
struct stripe_c *sc = (struct stripe_c *) ti->private;
char buffer[sc->stripes + 1];
unsigned int sz = 0;
unsigned int i;
......@@ -377,11 +378,12 @@ static void stripe_status(struct dm_target *ti, status_type_t type,
DMEMIT("%d ", sc->stripes);
for (i = 0; i < sc->stripes; i++) {
DMEMIT("%s ", sc->stripe[i].dev->name);
buffer[i] = atomic_read(&(sc->stripe[i].error_count)) ?
'D' : 'A';
}
buffer[i] = '\0';
DMEMIT("1 %s", buffer);
DMEMIT("1 ");
for (i = 0; i < sc->stripes; i++) {
DMEMIT("%c", atomic_read(&(sc->stripe[i].error_count)) ?
'D' : 'A');
}
break;
case STATUSTYPE_TABLE:
......
......@@ -466,7 +466,8 @@ static int process_set_region_mappings(struct switch_ctx *sctx,
*
* Only set_region_mappings is supported.
*/
static int switch_message(struct dm_target *ti, unsigned argc, char **argv)
static int switch_message(struct dm_target *ti, unsigned argc, char **argv,
char *result, unsigned maxlen)
{
static DEFINE_MUTEX(message_mutex);
......@@ -511,8 +512,7 @@ static void switch_status(struct dm_target *ti, status_type_t type,
*
* Passthrough all ioctls to the path for sector 0
*/
static int switch_prepare_ioctl(struct dm_target *ti,
struct block_device **bdev, fmode_t *mode)
static int switch_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
{
struct switch_ctx *sctx = ti->private;
unsigned path_nr;
......@@ -520,7 +520,6 @@ static int switch_prepare_ioctl(struct dm_target *ti,
path_nr = switch_get_path_nr(sctx, 0);
*bdev = sctx->path_list[path_nr].dmdev->bdev;
*mode = sctx->path_list[path_nr].dmdev->mode;
/*
* Only pass ioctls through if the device sizes match exactly.
......
......@@ -1846,6 +1846,34 @@ static bool dm_table_supports_discards(struct dm_table *t)
return true;
}
static int device_not_secure_erase_capable(struct dm_target *ti,
struct dm_dev *dev, sector_t start,
sector_t len, void *data)
{
struct request_queue *q = bdev_get_queue(dev->bdev);
return q && !blk_queue_secure_erase(q);
}
static bool dm_table_supports_secure_erase(struct dm_table *t)
{
struct dm_target *ti;
unsigned int i;
for (i = 0; i < dm_table_get_num_targets(t); i++) {
ti = dm_table_get_target(t, i);
if (!ti->num_secure_erase_bios)
return false;
if (!ti->type->iterate_devices ||
ti->type->iterate_devices(ti, device_not_secure_erase_capable, NULL))
return false;
}
return true;
}
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
......@@ -1867,6 +1895,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
} else
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
if (dm_table_supports_secure_erase(t))
blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
wc = true;
if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_FUA)))
......
......@@ -16,8 +16,6 @@
static LIST_HEAD(_targets);
static DECLARE_RWSEM(_lock);
#define DM_MOD_NAME_SIZE 32
static inline struct target_type *__find_target_type(const char *name)
{
struct target_type *tt;
......
......@@ -3705,7 +3705,8 @@ static int process_release_metadata_snap_mesg(unsigned argc, char **argv, struct
* reserve_metadata_snap
* release_metadata_snap
*/
static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
static int pool_message(struct dm_target *ti, unsigned argc, char **argv,
char *result, unsigned maxlen)
{
int r = -EINVAL;
struct pool_c *pt = ti->private;
......
......@@ -7,12 +7,6 @@
#include "dm.h"
#include <linux/module.h>
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/bitops.h>
#include <linux/device-mapper.h>
struct unstripe_c {
struct dm_dev *dev;
......@@ -69,12 +63,6 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto err;
}
// FIXME: must support non power of 2 chunk_size, dm-stripe.c does
if (!is_power_of_2(uc->chunk_size)) {
ti->error = "Non power of 2 chunk_size is not supported yet";
goto err;
}
if (kstrtouint(argv[2], 10, &uc->unstripe)) {
ti->error = "Invalid stripe number";
goto err;
......@@ -98,7 +86,7 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
uc->unstripe_offset = uc->unstripe * uc->chunk_size;
uc->unstripe_width = (uc->stripes - 1) * uc->chunk_size;
uc->chunk_shift = fls(uc->chunk_size) - 1;
uc->chunk_shift = is_power_of_2(uc->chunk_size) ? fls(uc->chunk_size) - 1 : 0;
tmp_len = ti->len;
if (sector_div(tmp_len, uc->chunk_size)) {
......@@ -129,14 +117,18 @@ static sector_t map_to_core(struct dm_target *ti, struct bio *bio)
{
struct unstripe_c *uc = ti->private;
sector_t sector = bio->bi_iter.bi_sector;
sector_t tmp_sector = sector;
/* Shift us up to the right "row" on the stripe */
sector += uc->unstripe_width * (sector >> uc->chunk_shift);
if (uc->chunk_shift)
tmp_sector >>= uc->chunk_shift;
else
sector_div(tmp_sector, uc->chunk_size);
/* Account for what stripe we're operating on */
sector += uc->unstripe_offset;
sector += uc->unstripe_width * tmp_sector;
return sector;
/* Account for what stripe we're operating on */
return sector + uc->unstripe_offset;
}
static int unstripe_map(struct dm_target *ti, struct bio *bio)
......@@ -185,7 +177,7 @@ static void unstripe_io_hints(struct dm_target *ti,
static struct target_type unstripe_target = {
.name = "unstriped",
.version = {1, 0, 0},
.version = {1, 1, 0},
.module = THIS_MODULE,
.ctr = unstripe_ctr,
.dtr = unstripe_dtr,
......@@ -197,13 +189,7 @@ static struct target_type unstripe_target = {
static int __init dm_unstripe_init(void)
{
int r;
r = dm_register_target(&unstripe_target);
if (r < 0)
DMERR("target registration failed");
return r;
return dm_register_target(&unstripe_target);
}
static void __exit dm_unstripe_exit(void)
......@@ -215,5 +201,6 @@ module_init(dm_unstripe_init);
module_exit(dm_unstripe_exit);
MODULE_DESCRIPTION(DM_NAME " unstriped target");
MODULE_ALIAS("dm-unstriped");
MODULE_AUTHOR("Scott Bauer <scott.bauer@intel.com>");
MODULE_LICENSE("GPL");
......@@ -32,6 +32,7 @@
#define DM_VERITY_OPT_LOGGING "ignore_corruption"
#define DM_VERITY_OPT_RESTART "restart_on_corruption"
#define DM_VERITY_OPT_IGN_ZEROES "ignore_zero_blocks"
#define DM_VERITY_OPT_AT_MOST_ONCE "check_at_most_once"
#define DM_VERITY_OPTS_MAX (2 + DM_VERITY_OPTS_FEC)
......@@ -347,8 +348,8 @@ int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
/*
* Calculates the digest for the given bio
*/
int verity_for_io_block(struct dm_verity *v, struct dm_verity_io *io,
struct bvec_iter *iter, struct crypto_wait *wait)
static int verity_for_io_block(struct dm_verity *v, struct dm_verity_io *io,
struct bvec_iter *iter, struct crypto_wait *wait)
{
unsigned int todo = 1 << v->data_dev_block_bits;
struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
......@@ -432,6 +433,18 @@ static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io,
return 0;
}
/*
* Moves the bio iter one data block forward.
*/
static inline void verity_bv_skip_block(struct dm_verity *v,
struct dm_verity_io *io,
struct bvec_iter *iter)
{
struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
bio_advance_iter(bio, iter, 1 << v->data_dev_block_bits);
}
/*
* Verify one "dm_verity_io" structure.
*/
......@@ -445,9 +458,16 @@ static int verity_verify_io(struct dm_verity_io *io)
for (b = 0; b < io->n_blocks; b++) {
int r;
sector_t cur_block = io->block + b;
struct ahash_request *req = verity_io_hash_req(v, io);
r = verity_hash_for_block(v, io, io->block + b,
if (v->validated_blocks &&
likely(test_bit(cur_block, v->validated_blocks))) {
verity_bv_skip_block(v, io, &io->iter);
continue;
}
r = verity_hash_for_block(v, io, cur_block,
verity_io_want_digest(v, io),
&is_zero);
if (unlikely(r < 0))
......@@ -481,13 +501,16 @@ static int verity_verify_io(struct dm_verity_io *io)
return r;
if (likely(memcmp(verity_io_real_digest(v, io),
verity_io_want_digest(v, io), v->digest_size) == 0))
verity_io_want_digest(v, io), v->digest_size) == 0)) {
if (v->validated_blocks)
set_bit(cur_block, v->validated_blocks);
continue;
}
else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA,
io->block + b, NULL, &start) == 0)
cur_block, NULL, &start) == 0)
continue;
else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
io->block + b))
cur_block))
return -EIO;
}
......@@ -673,6 +696,8 @@ static void verity_status(struct dm_target *ti, status_type_t type,
args += DM_VERITY_OPTS_FEC;
if (v->zero_digest)
args++;
if (v->validated_blocks)
args++;
if (!args)
return;
DMEMIT(" %u", args);
......@@ -691,13 +716,14 @@ static void verity_status(struct dm_target *ti, status_type_t type,
}
if (v->zero_digest)
DMEMIT(" " DM_VERITY_OPT_IGN_ZEROES);
if (v->validated_blocks)
DMEMIT(" " DM_VERITY_OPT_AT_MOST_ONCE);
sz = verity_fec_status_table(v, sz, result, maxlen);
break;
}
}
static int verity_prepare_ioctl(struct dm_target *ti,
struct block_device **bdev, fmode_t *mode)
static int verity_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
{
struct dm_verity *v = ti->private;
......@@ -740,6 +766,7 @@ static void verity_dtr(struct dm_target *ti)
if (v->bufio)
dm_bufio_client_destroy(v->bufio);
kvfree(v->validated_blocks);
kfree(v->salt);
kfree(v->root_digest);
kfree(v->zero_digest);
......@@ -760,6 +787,26 @@ static void verity_dtr(struct dm_target *ti)
kfree(v);
}
static int verity_alloc_most_once(struct dm_verity *v)
{
struct dm_target *ti = v->ti;
/* the bitset can only handle INT_MAX blocks */
if (v->data_blocks > INT_MAX) {
ti->error = "device too large to use check_at_most_once";
return -E2BIG;
}
v->validated_blocks = kvzalloc(BITS_TO_LONGS(v->data_blocks) *
sizeof(unsigned long), GFP_KERNEL);
if (!v->validated_blocks) {
ti->error = "failed to allocate bitset for check_at_most_once";
return -ENOMEM;
}
return 0;
}
static int verity_alloc_zero_digest(struct dm_verity *v)
{
int r = -ENOMEM;
......@@ -829,6 +876,12 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v)
}
continue;
} else if (!strcasecmp(arg_name, DM_VERITY_OPT_AT_MOST_ONCE)) {
r = verity_alloc_most_once(v);
if (r)
return r;
continue;
} else if (verity_is_fec_opt_arg(arg_name)) {
r = verity_fec_parse_opt_args(as, v, &argc, arg_name);
if (r)
......@@ -1096,7 +1149,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
static struct target_type verity_target = {
.name = "verity",
.version = {1, 3, 0},
.version = {1, 4, 0},
.module = THIS_MODULE,
.ctr = verity_ctr,
.dtr = verity_dtr,
......
......@@ -12,7 +12,7 @@
#ifndef DM_VERITY_H
#define DM_VERITY_H
#include "dm-bufio.h"
#include <linux/dm-bufio.h>
#include <linux/device-mapper.h>
#include <crypto/hash.h>
......@@ -63,6 +63,7 @@ struct dm_verity {
sector_t hash_level_block[DM_VERITY_MAX_LEVELS];
struct dm_verity_fec *fec; /* forward error correction */
unsigned long *validated_blocks; /* bitset blocks validated */
};
struct dm_verity_io {
......
......@@ -898,8 +898,7 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
/*
* Pass on ioctl to the backend device.
*/
static int dmz_prepare_ioctl(struct dm_target *ti,
struct block_device **bdev, fmode_t *mode)
static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
{
struct dmz_target *dmz = ti->private;
......
......@@ -458,67 +458,56 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return dm_get_geometry(md, geo);
}
static char *_dm_claim_ptr = "I belong to device-mapper";
static int dm_get_bdev_for_ioctl(struct mapped_device *md,
struct block_device **bdev,
fmode_t *mode)
static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
struct block_device **bdev)
__acquires(md->io_barrier)
{
struct dm_target *tgt;
struct dm_table *map;
int srcu_idx, r, r2;
int r;
retry:
r = -ENOTTY;
map = dm_get_live_table(md, &srcu_idx);
map = dm_get_live_table(md, srcu_idx);
if (!map || !dm_table_get_size(map))
goto out;
return r;
/* We only support devices that have a single target */
if (dm_table_get_num_targets(map) != 1)
goto out;
return r;
tgt = dm_table_get_target(map, 0);
if (!tgt->type->prepare_ioctl)
goto out;
if (dm_suspended_md(md)) {
r = -EAGAIN;
goto out;
}
r = tgt->type->prepare_ioctl(tgt, bdev, mode);
if (r < 0)
goto out;
bdgrab(*bdev);
r2 = blkdev_get(*bdev, *mode, _dm_claim_ptr);
if (r2 < 0) {
r = r2;
goto out;
}
return r;
dm_put_live_table(md, srcu_idx);
return r;
if (dm_suspended_md(md))
return -EAGAIN;
out:
dm_put_live_table(md, srcu_idx);
r = tgt->type->prepare_ioctl(tgt, bdev);
if (r == -ENOTCONN && !fatal_signal_pending(current)) {
dm_put_live_table(md, *srcu_idx);
msleep(10);
goto retry;
}
return r;
}
static void dm_unprepare_ioctl(struct mapped_device *md, int srcu_idx)
__releases(md->io_barrier)
{
dm_put_live_table(md, srcu_idx);
}
static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
struct mapped_device *md = bdev->bd_disk->private_data;
int r;
int r, srcu_idx;
r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
if (r < 0)
return r;
goto out;
if (r > 0) {
/*
......@@ -536,7 +525,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
out:
blkdev_put(bdev, mode);
dm_unprepare_ioctl(md, srcu_idx);
return r;
}
......@@ -710,6 +699,8 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
rcu_read_unlock();
}
static char *_dm_claim_ptr = "I belong to device-mapper";
/*
* Open a table device so we can use it as a map destination.
*/
......@@ -1414,6 +1405,11 @@ static unsigned get_num_discard_bios(struct dm_target *ti)
return ti->num_discard_bios;
}
static unsigned get_num_secure_erase_bios(struct dm_target *ti)
{
return ti->num_secure_erase_bios;
}
static unsigned get_num_write_same_bios(struct dm_target *ti)
{
return ti->num_write_same_bios;
......@@ -1467,6 +1463,11 @@ static int __send_discard(struct clone_info *ci, struct dm_target *ti)
is_split_required_for_discard);
}
static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti)
{
return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios, NULL);
}
static int __send_write_same(struct clone_info *ci, struct dm_target *ti)
{
return __send_changing_extent_only(ci, ti, get_num_write_same_bios, NULL);
......@@ -1477,6 +1478,25 @@ static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti)
return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios, NULL);
}
static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti,
int *result)
{
struct bio *bio = ci->bio;
if (bio_op(bio) == REQ_OP_DISCARD)
*result = __send_discard(ci, ti);
else if (bio_op(bio) == REQ_OP_SECURE_ERASE)
*result = __send_secure_erase(ci, ti);
else if (bio_op(bio) == REQ_OP_WRITE_SAME)
*result = __send_write_same(ci, ti);
else if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
*result = __send_write_zeroes(ci, ti);
else
return false;
return true;
}
/*
* Select the correct strategy for processing a non-flush bio.
*/
......@@ -1491,12 +1511,8 @@ static int __split_and_process_non_flush(struct clone_info *ci)
if (!dm_target_is_valid(ti))
return -EIO;
if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
return __send_discard(ci, ti);
else if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
return __send_write_same(ci, ti);
else if (unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES))
return __send_write_zeroes(ci, ti);
if (unlikely(__process_abnormal_io(ci, ti, &r)))
return r;
if (bio_op(bio) == REQ_OP_ZONE_REPORT)
len = ci->sector_count;
......@@ -1617,9 +1633,12 @@ static blk_qc_t __process_bio(struct mapped_device *md,
goto out;
}
tio = alloc_tio(&ci, ti, 0, GFP_NOIO);
ci.bio = bio;
ci.sector_count = bio_sectors(bio);
if (unlikely(__process_abnormal_io(&ci, ti, &error)))
goto out;
tio = alloc_tio(&ci, ti, 0, GFP_NOIO);
ret = __clone_and_map_simple_bio(&ci, tio, NULL);
}
out:
......@@ -3015,20 +3034,19 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
{
struct mapped_device *md = bdev->bd_disk->private_data;
const struct pr_ops *ops;
fmode_t mode;
int r;
int r, srcu_idx;
r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
if (r < 0)
return r;
goto out;
ops = bdev->bd_disk->fops->pr_ops;
if (ops && ops->pr_reserve)
r = ops->pr_reserve(bdev, key, type, flags);
else
r = -EOPNOTSUPP;
blkdev_put(bdev, mode);
out:
dm_unprepare_ioctl(md, srcu_idx);
return r;
}
......@@ -3036,20 +3054,19 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
{
struct mapped_device *md = bdev->bd_disk->private_data;
const struct pr_ops *ops;
fmode_t mode;
int r;
int r, srcu_idx;
r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
if (r < 0)
return r;
goto out;
ops = bdev->bd_disk->fops->pr_ops;
if (ops && ops->pr_release)
r = ops->pr_release(bdev, key, type);
else
r = -EOPNOTSUPP;
blkdev_put(bdev, mode);
out:
dm_unprepare_ioctl(md, srcu_idx);
return r;
}
......@@ -3058,20 +3075,19 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
{
struct mapped_device *md = bdev->bd_disk->private_data;
const struct pr_ops *ops;
fmode_t mode;
int r;
int r, srcu_idx;
r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
if (r < 0)
return r;
goto out;
ops = bdev->bd_disk->fops->pr_ops;
if (ops && ops->pr_preempt)
r = ops->pr_preempt(bdev, old_key, new_key, type, abort);
else
r = -EOPNOTSUPP;
blkdev_put(bdev, mode);
out:
dm_unprepare_ioctl(md, srcu_idx);
return r;
}
......@@ -3079,20 +3095,19 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
{
struct mapped_device *md = bdev->bd_disk->private_data;
const struct pr_ops *ops;
fmode_t mode;
int r;
int r, srcu_idx;
r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
if (r < 0)
return r;
goto out;
ops = bdev->bd_disk->fops->pr_ops;
if (ops && ops->pr_clear)
r = ops->pr_clear(bdev, key);
else
r = -EOPNOTSUPP;
blkdev_put(bdev, mode);
out:
dm_unprepare_ioctl(md, srcu_idx);
return r;
}
......
......@@ -5,8 +5,8 @@
*/
#include "dm-block-manager.h"
#include "dm-persistent-data-internal.h"
#include "../dm-bufio.h"
#include <linux/dm-bufio.h>
#include <linux/crc32c.h>
#include <linux/module.h>
#include <linux/slab.h>
......
......@@ -87,10 +87,10 @@ typedef void (*dm_resume_fn) (struct dm_target *ti);
typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type,
unsigned status_flags, char *result, unsigned maxlen);
typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv);
typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv,
char *result, unsigned maxlen);
typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti,
struct block_device **bdev, fmode_t *mode);
typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev);
/*
* These iteration functions are typically used to check (and combine)
......@@ -266,6 +266,12 @@ struct dm_target {
*/
unsigned num_discard_bios;
/*
* The number of secure erase bios that will be submitted to the target.
* The bio number can be accessed with dm_bio_get_target_bio_nr.
*/
unsigned num_secure_erase_bios;
/*
* The number of WRITE SAME bios that will be submitted to the target.
* The bio number can be accessed with dm_bio_get_target_bio_nr.
......
......@@ -6,8 +6,8 @@
* This file is released under the GPL.
*/
#ifndef DM_BUFIO_H
#define DM_BUFIO_H
#ifndef _LINUX_DM_BUFIO_H
#define _LINUX_DM_BUFIO_H
#include <linux/blkdev.h>
#include <linux/types.h>
......
......@@ -270,9 +270,9 @@ enum {
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_VERSION_MAJOR 4
#define DM_VERSION_MINOR 37
#define DM_VERSION_MINOR 39
#define DM_VERSION_PATCHLEVEL 0
#define DM_VERSION_EXTRA "-ioctl (2017-09-20)"
#define DM_VERSION_EXTRA "-ioctl (2018-04-03)"
/* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment