Commit 9d609f85 authored by Mikulas Patocka's avatar Mikulas Patocka Committed by Mike Snitzer

dm integrity: support larger block sizes

The DM integrity block size can now be 512, 1k, 2k or 4k.  Using larger
blocks reduces metadata handling overhead.  The block size can be
configured at table load time using the "block_size:<value>" option;
where <value> is expressed in bytes (defult is still 512 bytes).

It is safe to use larger block sizes with DM integrity, because the
DM integrity journal makes sure that the whole block is updated
atomically even if the underlying device doesn't support atomic writes
of that size (e.g. 4k block ontop of a 512b device).

Depends-on: 2859323e ("block: fix blk_integrity_register to use template's interval_exp if not 0")
Signed-off-by: default avatarMikulas Patocka <mpatocka@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent 56b67a4f
...@@ -136,6 +136,11 @@ journal_mac:algorithm(:key) (the key is optional) ...@@ -136,6 +136,11 @@ journal_mac:algorithm(:key) (the key is optional)
the journal. Thus, modified sector number would be detected at the journal. Thus, modified sector number would be detected at
this stage. this stage.
block_size:number
The size of a data block in bytes. The larger the block size the
less overhead there is for per-block integrity metadata.
Supported values are 512, 1024, 2048 and 4096 bytes. If not
specified the default block size is 512 bytes.
The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
be changed when reloading the target (load an inactive table and swap the be changed when reloading the target (load an inactive table and swap the
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/log2.h>
#include <crypto/hash.h> #include <crypto/hash.h>
#include <crypto/skcipher.h> #include <crypto/skcipher.h>
#include <linux/async_tx.h> #include <linux/async_tx.h>
...@@ -45,6 +46,7 @@ ...@@ -45,6 +46,7 @@
#define SB_MAGIC "integrt" #define SB_MAGIC "integrt"
#define SB_VERSION 1 #define SB_VERSION 1
#define SB_SECTORS 8 #define SB_SECTORS 8
#define MAX_SECTORS_PER_BLOCK 8
struct superblock { struct superblock {
__u8 magic[8]; __u8 magic[8];
...@@ -54,6 +56,7 @@ struct superblock { ...@@ -54,6 +56,7 @@ struct superblock {
__u32 journal_sections; __u32 journal_sections;
__u64 provided_data_sectors; /* userspace uses this value */ __u64 provided_data_sectors; /* userspace uses this value */
__u32 flags; __u32 flags;
__u8 log2_sectors_per_block;
}; };
#define SB_FLAG_HAVE_JOURNAL_MAC 0x1 #define SB_FLAG_HAVE_JOURNAL_MAC 0x1
...@@ -71,10 +74,12 @@ struct journal_entry { ...@@ -71,10 +74,12 @@ struct journal_entry {
} s; } s;
__u64 sector; __u64 sector;
} u; } u;
commit_id_t last_bytes; commit_id_t last_bytes[0];
__u8 tag[0]; /* __u8 tag[0]; */
}; };
#define journal_entry_tag(ic, je) ((__u8 *)&(je)->last_bytes[(ic)->sectors_per_block])
#if BITS_PER_LONG == 64 #if BITS_PER_LONG == 64
#define journal_entry_set_sector(je, x) do { smp_wmb(); ACCESS_ONCE((je)->u.sector) = cpu_to_le64(x); } while (0) #define journal_entry_set_sector(je, x) do { smp_wmb(); ACCESS_ONCE((je)->u.sector) = cpu_to_le64(x); } while (0)
#define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector) #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector)
...@@ -100,7 +105,7 @@ struct journal_sector { ...@@ -100,7 +105,7 @@ struct journal_sector {
commit_id_t commit_id; commit_id_t commit_id;
}; };
#define MAX_TAG_SIZE (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, tag)) #define MAX_TAG_SIZE (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, last_bytes[MAX_SECTORS_PER_BLOCK]))
#define METADATA_PADDING_SECTORS 8 #define METADATA_PADDING_SECTORS 8
...@@ -162,7 +167,7 @@ struct dm_integrity_c { ...@@ -162,7 +167,7 @@ struct dm_integrity_c {
unsigned short journal_entry_size; unsigned short journal_entry_size;
unsigned char journal_entries_per_sector; unsigned char journal_entries_per_sector;
unsigned char journal_section_entries; unsigned char journal_section_entries;
unsigned char journal_section_sectors; unsigned short journal_section_sectors;
unsigned journal_sections; unsigned journal_sections;
unsigned journal_entries; unsigned journal_entries;
sector_t device_sectors; sector_t device_sectors;
...@@ -170,6 +175,7 @@ struct dm_integrity_c { ...@@ -170,6 +175,7 @@ struct dm_integrity_c {
unsigned metadata_run; unsigned metadata_run;
__s8 log2_metadata_run; __s8 log2_metadata_run;
__u8 log2_buffer_sectors; __u8 log2_buffer_sectors;
__u8 sectors_per_block;
unsigned char mode; unsigned char mode;
bool suspending; bool suspending;
...@@ -332,6 +338,12 @@ static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector, ...@@ -332,6 +338,12 @@ static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector,
*offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1); *offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1);
} }
#define sector_to_block(ic, n) \
do { \
BUG_ON((n) & (unsigned)((ic)->sectors_per_block - 1)); \
(n) >>= (ic)->sb->log2_sectors_per_block; \
} while (0)
static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t area, static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t area,
sector_t offset, unsigned *metadata_offset) sector_t offset, unsigned *metadata_offset)
{ {
...@@ -345,6 +357,8 @@ static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t ...@@ -345,6 +357,8 @@ static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t
ms += area * ic->metadata_run; ms += area * ic->metadata_run;
ms >>= ic->log2_buffer_sectors; ms >>= ic->log2_buffer_sectors;
sector_to_block(ic, offset);
if (likely(ic->log2_tag_size >= 0)) { if (likely(ic->log2_tag_size >= 0)) {
ms += offset >> (SECTOR_SHIFT + ic->log2_buffer_sectors - ic->log2_tag_size); ms += offset >> (SECTOR_SHIFT + ic->log2_buffer_sectors - ic->log2_tag_size);
mo = (offset << ic->log2_tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1); mo = (offset << ic->log2_tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1);
...@@ -459,9 +473,13 @@ static struct journal_entry *access_journal_entry(struct dm_integrity_c *ic, uns ...@@ -459,9 +473,13 @@ static struct journal_entry *access_journal_entry(struct dm_integrity_c *ic, uns
static struct journal_sector *access_journal_data(struct dm_integrity_c *ic, unsigned section, unsigned n) static struct journal_sector *access_journal_data(struct dm_integrity_c *ic, unsigned section, unsigned n)
{ {
access_journal_check(ic, section, n, true, "access_journal_data"); n <<= ic->sb->log2_sectors_per_block;
n += JOURNAL_BLOCK_SECTORS;
return access_journal(ic, section, n + JOURNAL_BLOCK_SECTORS); access_journal_check(ic, section, n, false, "access_journal_data");
return access_journal(ic, section, n);
} }
static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result[JOURNAL_MAC_SIZE]) static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result[JOURNAL_MAC_SIZE])
...@@ -812,6 +830,8 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig ...@@ -812,6 +830,8 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig
int r; int r;
unsigned sector, pl_index, pl_offset; unsigned sector, pl_index, pl_offset;
BUG_ON((target | n_sectors | offset) & (unsigned)(ic->sectors_per_block - 1));
if (unlikely(dm_integrity_failed(ic))) { if (unlikely(dm_integrity_failed(ic))) {
fn(-1UL, data); fn(-1UL, data);
return; return;
...@@ -846,6 +866,8 @@ static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range * ...@@ -846,6 +866,8 @@ static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *
struct rb_node **n = &ic->in_progress.rb_node; struct rb_node **n = &ic->in_progress.rb_node;
struct rb_node *parent; struct rb_node *parent;
BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned)(ic->sectors_per_block - 1));
parent = NULL; parent = NULL;
while (*n) { while (*n) {
...@@ -1175,7 +1197,7 @@ static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector ...@@ -1175,7 +1197,7 @@ static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector
goto failed; goto failed;
} }
r = crypto_shash_update(req, data, 1 << SECTOR_SHIFT); r = crypto_shash_update(req, data, ic->sectors_per_block << SECTOR_SHIFT);
if (unlikely(r < 0)) { if (unlikely(r < 0)) {
dm_integrity_io_error(ic, "crypto_shash_update", r); dm_integrity_io_error(ic, "crypto_shash_update", r);
goto failed; goto failed;
...@@ -1219,7 +1241,7 @@ static void integrity_metadata(struct work_struct *w) ...@@ -1219,7 +1241,7 @@ static void integrity_metadata(struct work_struct *w)
if (unlikely(ic->mode == 'R')) if (unlikely(ic->mode == 'R'))
goto skip_io; goto skip_io;
checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT) * ic->tag_size + extra_space, checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN); GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
if (!checksums) if (!checksums)
checksums = checksums_onstack; checksums = checksums_onstack;
...@@ -1235,9 +1257,9 @@ static void integrity_metadata(struct work_struct *w) ...@@ -1235,9 +1257,9 @@ static void integrity_metadata(struct work_struct *w)
do { do {
integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr); integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
checksums_ptr += ic->tag_size; checksums_ptr += ic->tag_size;
sectors_to_process--; sectors_to_process -= ic->sectors_per_block;
pos += 1 << SECTOR_SHIFT; pos += ic->sectors_per_block << SECTOR_SHIFT;
sector++; sector += ic->sectors_per_block;
} while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack); } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
kunmap_atomic(mem); kunmap_atomic(mem);
...@@ -1272,7 +1294,9 @@ static void integrity_metadata(struct work_struct *w) ...@@ -1272,7 +1294,9 @@ static void integrity_metadata(struct work_struct *w)
if (bip) { if (bip) {
struct bio_vec biv; struct bio_vec biv;
struct bvec_iter iter; struct bvec_iter iter;
unsigned data_to_process = dio->range.n_sectors * ic->tag_size; unsigned data_to_process = dio->range.n_sectors;
sector_to_block(ic, data_to_process);
data_to_process *= ic->tag_size;
bip_for_each_vec(biv, bip, iter) { bip_for_each_vec(biv, bip, iter) {
unsigned char *tag; unsigned char *tag;
...@@ -1303,6 +1327,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio) ...@@ -1303,6 +1327,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
{ {
struct dm_integrity_c *ic = ti->private; struct dm_integrity_c *ic = ti->private;
struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
struct bio_integrity_payload *bip;
sector_t area, offset; sector_t area, offset;
...@@ -1330,6 +1355,44 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio) ...@@ -1330,6 +1355,44 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
(unsigned long long)ic->provided_data_sectors); (unsigned long long)ic->provided_data_sectors);
return -EIO; return -EIO;
} }
if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) {
DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
ic->sectors_per_block,
(unsigned long long)dio->range.logical_sector, bio_sectors(bio));
return -EIO;
}
if (ic->sectors_per_block > 1) {
struct bvec_iter iter;
struct bio_vec bv;
bio_for_each_segment(bv, bio, iter) {
if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
bv.bv_offset, bv.bv_len, ic->sectors_per_block);
return -EIO;
}
}
}
bip = bio_integrity(bio);
if (!ic->internal_hash) {
if (bip) {
unsigned wanted_tag_size = bio_sectors(bio) >> ic->sb->log2_sectors_per_block;
if (ic->log2_tag_size >= 0)
wanted_tag_size <<= ic->log2_tag_size;
else
wanted_tag_size *= ic->tag_size;
if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size);
return -EIO;
}
}
} else {
if (unlikely(bip != NULL)) {
DMERR("Unexpected integrity data when using internal hash");
return -EIO;
}
}
if (unlikely(ic->mode == 'R') && unlikely(dio->write)) if (unlikely(ic->mode == 'R') && unlikely(dio->write))
return -EIO; return -EIO;
...@@ -1369,6 +1432,8 @@ static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio, ...@@ -1369,6 +1432,8 @@ static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio,
if (unlikely(!dio->write)) { if (unlikely(!dio->write)) {
struct journal_sector *js; struct journal_sector *js;
char *mem_ptr;
unsigned s;
if (unlikely(journal_entry_is_inprogress(je))) { if (unlikely(journal_entry_is_inprogress(je))) {
flush_dcache_page(bv.bv_page); flush_dcache_page(bv.bv_page);
...@@ -1380,14 +1445,20 @@ static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio, ...@@ -1380,14 +1445,20 @@ static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio,
smp_rmb(); smp_rmb();
BUG_ON(journal_entry_get_sector(je) != logical_sector); BUG_ON(journal_entry_get_sector(je) != logical_sector);
js = access_journal_data(ic, journal_section, journal_entry); js = access_journal_data(ic, journal_section, journal_entry);
memcpy(mem + bv.bv_offset, js, JOURNAL_SECTOR_DATA); mem_ptr = mem + bv.bv_offset;
memcpy(mem + bv.bv_offset + JOURNAL_SECTOR_DATA, &je->last_bytes, sizeof je->last_bytes); s = 0;
do {
memcpy(mem_ptr, js, JOURNAL_SECTOR_DATA);
*(commit_id_t *)(mem_ptr + JOURNAL_SECTOR_DATA) = je->last_bytes[s];
js++;
mem_ptr += 1 << SECTOR_SHIFT;
} while (++s < ic->sectors_per_block);
#ifdef INTERNAL_VERIFY #ifdef INTERNAL_VERIFY
if (ic->internal_hash) { if (ic->internal_hash) {
char checksums_onstack[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)]; char checksums_onstack[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)];
integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack); integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack);
if (unlikely(memcmp(checksums_onstack, je->tag, ic->tag_size))) { if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) {
DMERR("Checksum failed when reading from journal, at sector 0x%llx", DMERR("Checksum failed when reading from journal, at sector 0x%llx",
(unsigned long long)logical_sector); (unsigned long long)logical_sector);
} }
...@@ -1398,7 +1469,7 @@ static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio, ...@@ -1398,7 +1469,7 @@ static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio,
if (!ic->internal_hash) { if (!ic->internal_hash) {
struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_integrity_payload *bip = bio_integrity(bio);
unsigned tag_todo = ic->tag_size; unsigned tag_todo = ic->tag_size;
char *tag_ptr = je->tag; char *tag_ptr = journal_entry_tag(ic, je);
if (bip) do { if (bip) do {
struct bio_vec biv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); struct bio_vec biv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
...@@ -1421,24 +1492,29 @@ static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio, ...@@ -1421,24 +1492,29 @@ static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio,
if (likely(dio->write)) { if (likely(dio->write)) {
struct journal_sector *js; struct journal_sector *js;
unsigned s;
js = access_journal_data(ic, journal_section, journal_entry); js = access_journal_data(ic, journal_section, journal_entry);
memcpy(js, mem + bv.bv_offset, 1 << SECTOR_SHIFT); memcpy(js, mem + bv.bv_offset, ic->sectors_per_block << SECTOR_SHIFT);
je->last_bytes = js->commit_id;
s = 0;
do {
je->last_bytes[s] = js[s].commit_id;
} while (++s < ic->sectors_per_block);
if (ic->internal_hash) { if (ic->internal_hash) {
unsigned digest_size = crypto_shash_digestsize(ic->internal_hash); unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
if (unlikely(digest_size > ic->tag_size)) { if (unlikely(digest_size > ic->tag_size)) {
char checksums_onstack[digest_size]; char checksums_onstack[digest_size];
integrity_sector_checksum(ic, logical_sector, (char *)js, checksums_onstack); integrity_sector_checksum(ic, logical_sector, (char *)js, checksums_onstack);
memcpy(je->tag, checksums_onstack, ic->tag_size); memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size);
} else } else
integrity_sector_checksum(ic, logical_sector, (char *)js, je->tag); integrity_sector_checksum(ic, logical_sector, (char *)js, journal_entry_tag(ic, je));
} }
journal_entry_set_sector(je, logical_sector); journal_entry_set_sector(je, logical_sector);
} }
logical_sector++; logical_sector += ic->sectors_per_block;
journal_entry++; journal_entry++;
if (unlikely(journal_entry == ic->journal_section_entries)) { if (unlikely(journal_entry == ic->journal_section_entries)) {
...@@ -1447,8 +1523,8 @@ static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio, ...@@ -1447,8 +1523,8 @@ static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio,
wraparound_section(ic, &journal_section); wraparound_section(ic, &journal_section);
} }
bv.bv_offset += 1 << SECTOR_SHIFT; bv.bv_offset += ic->sectors_per_block << SECTOR_SHIFT;
} while (bv.bv_len -= 1 << SECTOR_SHIFT); } while (bv.bv_len -= ic->sectors_per_block << SECTOR_SHIFT);
if (unlikely(!dio->write)) if (unlikely(!dio->write))
flush_dcache_page(bv.bv_page); flush_dcache_page(bv.bv_page);
...@@ -1526,7 +1602,8 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map ...@@ -1526,7 +1602,8 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
pos = journal_section * ic->journal_section_entries + journal_entry; pos = journal_section * ic->journal_section_entries + journal_entry;
ws = journal_section; ws = journal_section;
we = journal_entry; we = journal_entry;
for (i = 0; i < dio->range.n_sectors; i++) { i = 0;
do {
struct journal_entry *je; struct journal_entry *je;
add_journal_node(ic, &ic->journal_tree[pos], dio->range.logical_sector + i); add_journal_node(ic, &ic->journal_tree[pos], dio->range.logical_sector + i);
...@@ -1543,7 +1620,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map ...@@ -1543,7 +1620,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
ws++; ws++;
wraparound_section(ic, &ws); wraparound_section(ic, &ws);
} }
} } while ((i += ic->sectors_per_block) < dio->range.n_sectors);
spin_unlock_irq(&ic->endio_wait.lock); spin_unlock_irq(&ic->endio_wait.lock);
goto journal_read_write; goto journal_read_write;
...@@ -1555,8 +1632,9 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map ...@@ -1555,8 +1632,9 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
dio->range.n_sectors = next_sector - dio->range.logical_sector; dio->range.n_sectors = next_sector - dio->range.logical_sector;
} else { } else {
unsigned i; unsigned i;
for (i = 1; i < dio->range.n_sectors; i++) { unsigned jp = journal_read_pos + 1;
if (!test_journal_node(ic, journal_read_pos + i, dio->range.logical_sector + i)) for (i = ic->sectors_per_block; i < dio->range.n_sectors; i += ic->sectors_per_block, jp++) {
if (!test_journal_node(ic, jp, dio->range.logical_sector + i))
break; break;
} }
dio->range.n_sectors = i; dio->range.n_sectors = i;
...@@ -1725,6 +1803,16 @@ static void complete_copy_from_journal(unsigned long error, void *context) ...@@ -1725,6 +1803,16 @@ static void complete_copy_from_journal(unsigned long error, void *context)
complete_journal_op(comp); complete_journal_op(comp);
} }
static void restore_last_bytes(struct dm_integrity_c *ic, struct journal_sector *js,
struct journal_entry *je)
{
unsigned s = 0;
do {
js->commit_id = je->last_bytes[s];
js++;
} while (++s < ic->sectors_per_block);
}
static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
unsigned write_sections, bool from_replay) unsigned write_sections, bool from_replay)
{ {
...@@ -1753,8 +1841,14 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, ...@@ -1753,8 +1841,14 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
continue; continue;
BUG_ON(unlikely(journal_entry_is_inprogress(je)) && !from_replay); BUG_ON(unlikely(journal_entry_is_inprogress(je)) && !from_replay);
sec = journal_entry_get_sector(je); sec = journal_entry_get_sector(je);
if (unlikely(from_replay)) {
if (unlikely(sec & (unsigned)(ic->sectors_per_block - 1))) {
dm_integrity_io_error(ic, "invalid sector in journal", -EIO);
sec &= ~(sector_t)(ic->sectors_per_block - 1);
}
}
get_area_and_offset(ic, sec, &area, &offset); get_area_and_offset(ic, sec, &area, &offset);
access_journal_data(ic, i, j)->commit_id = je->last_bytes; restore_last_bytes(ic, access_journal_data(ic, i, j), je);
for (k = j + 1; k < ic->journal_section_entries; k++) { for (k = j + 1; k < ic->journal_section_entries; k++) {
struct journal_entry *je2 = access_journal_entry(ic, i, k); struct journal_entry *je2 = access_journal_entry(ic, i, k);
sector_t sec2, area2, offset2; sector_t sec2, area2, offset2;
...@@ -1763,16 +1857,16 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, ...@@ -1763,16 +1857,16 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay); BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay);
sec2 = journal_entry_get_sector(je2); sec2 = journal_entry_get_sector(je2);
get_area_and_offset(ic, sec2, &area2, &offset2); get_area_and_offset(ic, sec2, &area2, &offset2);
if (area2 != area || offset2 != offset + (k - j)) if (area2 != area || offset2 != offset + ((k - j) << ic->sb->log2_sectors_per_block))
break; break;
access_journal_data(ic, i, k)->commit_id = je2->last_bytes; restore_last_bytes(ic, access_journal_data(ic, i, k), je2);
} }
next_loop = k - 1; next_loop = k - 1;
io = mempool_alloc(ic->journal_io_mempool, GFP_NOIO); io = mempool_alloc(ic->journal_io_mempool, GFP_NOIO);
io->comp = &comp; io->comp = &comp;
io->range.logical_sector = sec; io->range.logical_sector = sec;
io->range.n_sectors = k - j; io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block;
spin_lock_irq(&ic->endio_wait.lock); spin_lock_irq(&ic->endio_wait.lock);
while (unlikely(!add_new_range(ic, &io->range))) while (unlikely(!add_new_range(ic, &io->range)))
...@@ -1788,8 +1882,8 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, ...@@ -1788,8 +1882,8 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
journal_entry_set_unused(je2); journal_entry_set_unused(je2);
remove_journal_node(ic, &section_node[j]); remove_journal_node(ic, &section_node[j]);
j++; j++;
sec++; sec += ic->sectors_per_block;
offset++; offset += ic->sectors_per_block;
} }
while (j < k && find_newer_committed_node(ic, &section_node[k - 1])) { while (j < k && find_newer_committed_node(ic, &section_node[k - 1])) {
struct journal_entry *je2 = access_journal_entry(ic, i, k - 1); struct journal_entry *je2 = access_journal_entry(ic, i, k - 1);
...@@ -1822,14 +1916,14 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, ...@@ -1822,14 +1916,14 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
ic->internal_hash) { ic->internal_hash) {
char test_tag[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)]; char test_tag[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)];
integrity_sector_checksum(ic, sec + (l - j), integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block),
(char *)access_journal_data(ic, i, l), test_tag); (char *)access_journal_data(ic, i, l), test_tag);
if (unlikely(memcmp(test_tag, je2->tag, ic->tag_size))) if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size)))
dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ); dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ);
} }
journal_entry_set_unused(je2); journal_entry_set_unused(je2);
r = dm_integrity_rw_tag(ic, je2->tag, &metadata_block, &metadata_offset, r = dm_integrity_rw_tag(ic, journal_entry_tag(ic, je2), &metadata_block, &metadata_offset,
ic->tag_size, TAG_WRITE); ic->tag_size, TAG_WRITE);
if (unlikely(r)) { if (unlikely(r)) {
dm_integrity_io_error(ic, "reading tags", r); dm_integrity_io_error(ic, "reading tags", r);
...@@ -1837,7 +1931,9 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, ...@@ -1837,7 +1931,9 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
} }
atomic_inc(&comp.in_flight); atomic_inc(&comp.in_flight);
copy_from_journal(ic, i, j, k - j, get_data_sector(ic, area, offset), copy_from_journal(ic, i, j << ic->sb->log2_sectors_per_block,
(k - j) << ic->sb->log2_sectors_per_block,
get_data_sector(ic, area, offset),
complete_copy_from_journal, io); complete_copy_from_journal, io);
skip_io: skip_io:
j = next_loop; j = next_loop;
...@@ -2130,6 +2226,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, ...@@ -2130,6 +2226,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
watermark_percentage += ic->journal_entries / 2; watermark_percentage += ic->journal_entries / 2;
do_div(watermark_percentage, ic->journal_entries); do_div(watermark_percentage, ic->journal_entries);
arg_count = 5; arg_count = 5;
arg_count += ic->sectors_per_block != 1;
arg_count += !!ic->internal_hash_alg.alg_string; arg_count += !!ic->internal_hash_alg.alg_string;
arg_count += !!ic->journal_crypt_alg.alg_string; arg_count += !!ic->journal_crypt_alg.alg_string;
arg_count += !!ic->journal_mac_alg.alg_string; arg_count += !!ic->journal_mac_alg.alg_string;
...@@ -2140,6 +2237,8 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, ...@@ -2140,6 +2237,8 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage); DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage);
DMEMIT(" commit_time:%u", ic->autocommit_msec); DMEMIT(" commit_time:%u", ic->autocommit_msec);
if (ic->sectors_per_block != 1)
DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
#define EMIT_ALG(a, n) \ #define EMIT_ALG(a, n) \
do { \ do { \
...@@ -2165,19 +2264,30 @@ static int dm_integrity_iterate_devices(struct dm_target *ti, ...@@ -2165,19 +2264,30 @@ static int dm_integrity_iterate_devices(struct dm_target *ti,
return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data); return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data);
} }
static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
struct dm_integrity_c *ic = ti->private;
if (ic->sectors_per_block > 1) {
limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
}
}
static void calculate_journal_section_size(struct dm_integrity_c *ic) static void calculate_journal_section_size(struct dm_integrity_c *ic)
{ {
unsigned sector_space = JOURNAL_SECTOR_DATA; unsigned sector_space = JOURNAL_SECTOR_DATA;
ic->journal_sections = le32_to_cpu(ic->sb->journal_sections); ic->journal_sections = le32_to_cpu(ic->sb->journal_sections);
ic->journal_entry_size = roundup(offsetof(struct journal_entry, tag) + ic->tag_size, ic->journal_entry_size = roundup(offsetof(struct journal_entry, last_bytes[ic->sectors_per_block]) + ic->tag_size,
JOURNAL_ENTRY_ROUNDUP); JOURNAL_ENTRY_ROUNDUP);
if (ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) if (ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC))
sector_space -= JOURNAL_MAC_PER_SECTOR; sector_space -= JOURNAL_MAC_PER_SECTOR;
ic->journal_entries_per_sector = sector_space / ic->journal_entry_size; ic->journal_entries_per_sector = sector_space / ic->journal_entry_size;
ic->journal_section_entries = ic->journal_entries_per_sector * JOURNAL_BLOCK_SECTORS; ic->journal_section_entries = ic->journal_entries_per_sector * JOURNAL_BLOCK_SECTORS;
ic->journal_section_sectors = ic->journal_section_entries + JOURNAL_BLOCK_SECTORS; ic->journal_section_sectors = (ic->journal_section_entries << ic->sb->log2_sectors_per_block) + JOURNAL_BLOCK_SECTORS;
ic->journal_entries = ic->journal_section_entries * ic->journal_sections; ic->journal_entries = ic->journal_section_entries * ic->journal_sections;
} }
...@@ -2192,7 +2302,7 @@ static int calculate_device_limits(struct dm_integrity_c *ic) ...@@ -2192,7 +2302,7 @@ static int calculate_device_limits(struct dm_integrity_c *ic)
return -EINVAL; return -EINVAL;
ic->initial_sectors = initial_sectors; ic->initial_sectors = initial_sectors;
ic->metadata_run = roundup((__u64)ic->tag_size << ic->sb->log2_interleave_sectors, ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block),
(__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT; (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT;
if (!(ic->metadata_run & (ic->metadata_run - 1))) if (!(ic->metadata_run & (ic->metadata_run - 1)))
ic->log2_metadata_run = __ffs(ic->metadata_run); ic->log2_metadata_run = __ffs(ic->metadata_run);
...@@ -2217,6 +2327,7 @@ static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sec ...@@ -2217,6 +2327,7 @@ static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sec
memcpy(ic->sb->magic, SB_MAGIC, 8); memcpy(ic->sb->magic, SB_MAGIC, 8);
ic->sb->version = SB_VERSION; ic->sb->version = SB_VERSION;
ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size); ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size);
ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block);
if (ic->journal_mac_alg.alg_string) if (ic->journal_mac_alg.alg_string)
ic->sb->flags |= cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC); ic->sb->flags |= cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC);
...@@ -2256,8 +2367,9 @@ static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic) ...@@ -2256,8 +2367,9 @@ static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic)
memset(&bi, 0, sizeof(bi)); memset(&bi, 0, sizeof(bi));
bi.profile = &dm_integrity_profile; bi.profile = &dm_integrity_profile;
bi.tuple_size = ic->tag_size * (queue_logical_block_size(disk->queue) >> SECTOR_SHIFT); bi.tuple_size = ic->tag_size;
bi.tag_size = ic->tag_size; bi.tag_size = bi.tuple_size;
bi.interval_exp = ilog2(ic->sectors_per_block << SECTOR_SHIFT);
blk_integrity_register(disk, &bi); blk_integrity_register(disk, &bi);
blk_queue_max_integrity_segments(disk->queue, UINT_MAX); blk_queue_max_integrity_segments(disk->queue, UINT_MAX);
...@@ -2667,6 +2779,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error) ...@@ -2667,6 +2779,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
* internal_hash * internal_hash
* journal_crypt * journal_crypt
* journal_mac * journal_mac
* block_size
*/ */
static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
{ {
...@@ -2676,7 +2789,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -2676,7 +2789,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
unsigned extra_args; unsigned extra_args;
struct dm_arg_set as; struct dm_arg_set as;
static struct dm_arg _args[] = { static struct dm_arg _args[] = {
{0, 8, "Invalid number of feature args"}, {0, 9, "Invalid number of feature args"},
}; };
unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
bool should_write_sb; bool should_write_sb;
...@@ -2740,6 +2853,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -2740,6 +2853,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
buffer_sectors = DEFAULT_BUFFER_SECTORS; buffer_sectors = DEFAULT_BUFFER_SECTORS;
journal_watermark = DEFAULT_JOURNAL_WATERMARK; journal_watermark = DEFAULT_JOURNAL_WATERMARK;
sync_msec = DEFAULT_SYNC_MSEC; sync_msec = DEFAULT_SYNC_MSEC;
ic->sectors_per_block = 1;
as.argc = argc - DIRECT_ARGUMENTS; as.argc = argc - DIRECT_ARGUMENTS;
as.argv = argv + DIRECT_ARGUMENTS; as.argv = argv + DIRECT_ARGUMENTS;
...@@ -2766,7 +2880,16 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -2766,7 +2880,16 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
journal_watermark = val; journal_watermark = val;
else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1) else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1)
sync_msec = val; sync_msec = val;
else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) {
if (val < 1 << SECTOR_SHIFT ||
val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT ||
(val & (val -1))) {
r = -EINVAL;
ti->error = "Invalid block_size argument";
goto bad;
}
ic->sectors_per_block = val >> SECTOR_SHIFT;
} else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error, r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error,
"Invalid internal_hash argument"); "Invalid internal_hash argument");
if (r) if (r)
...@@ -2910,7 +3033,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -2910,7 +3033,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
} }
if (le16_to_cpu(ic->sb->integrity_tag_size) != ic->tag_size) { if (le16_to_cpu(ic->sb->integrity_tag_size) != ic->tag_size) {
r = -EINVAL; r = -EINVAL;
ti->error = "Invalid tag size"; ti->error = "Tag size doesn't match the information in superblock";
goto bad;
}
if (ic->sb->log2_sectors_per_block != __ffs(ic->sectors_per_block)) {
r = -EINVAL;
ti->error = "Block size doesn't match the information in superblock";
goto bad; goto bad;
} }
/* make sure that ti->max_io_len doesn't overflow */ /* make sure that ti->max_io_len doesn't overflow */
...@@ -3084,6 +3212,7 @@ static struct target_type integrity_target = { ...@@ -3084,6 +3212,7 @@ static struct target_type integrity_target = {
.resume = dm_integrity_resume, .resume = dm_integrity_resume,
.status = dm_integrity_status, .status = dm_integrity_status,
.iterate_devices = dm_integrity_iterate_devices, .iterate_devices = dm_integrity_iterate_devices,
.io_hints = dm_integrity_io_hints,
}; };
int __init dm_integrity_init(void) int __init dm_integrity_init(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment