Commit de89afc1 authored by Nikos Tsironis's avatar Nikos Tsironis Committed by Mike Snitzer

dm era: Recover committed writeset after crash

Following a system crash, dm-era fails to recover the committed writeset
for the current era, leading to lost writes. That is, we lose the
information about what blocks were written during the affected era.

dm-era assumes that the writeset of the current era is archived when the
device is suspended. So, when resuming the device, it just moves on to
the next era, ignoring the committed writeset.

This assumption holds when the device is properly shut down. But, when
the system crashes, the code that suspends the target never runs, so the
writeset for the current era is not archived.

There are three issues that cause the committed writeset to get lost:

1. dm-era doesn't load the committed writeset when opening the metadata
2. The code that resizes the metadata wipes the information about the
   committed writeset (assuming it was loaded at step 1)
3. era_preresume() starts a new era, without taking into account that
   the current era might not have been archived, due to a system crash.

To fix this:

1. Load the committed writeset when opening the metadata
2. Fix the code that resizes the metadata to make sure it doesn't wipe
   the loaded writeset
3. Fix era_preresume() to check for a loaded writeset and archive it,
   before starting a new era.

Fixes: eec40579 ("dm: add era target")
Cc: stable@vger.kernel.org # v3.15+
Signed-off-by: default avatarNikos Tsironis <ntsironis@arrikto.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent d9928ac5
...@@ -71,8 +71,6 @@ static size_t bitset_size(unsigned nr_bits) ...@@ -71,8 +71,6 @@ static size_t bitset_size(unsigned nr_bits)
*/ */
static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks) static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks)
{ {
ws->md.nr_bits = nr_blocks;
ws->md.root = INVALID_WRITESET_ROOT;
ws->bits = vzalloc(bitset_size(nr_blocks)); ws->bits = vzalloc(bitset_size(nr_blocks));
if (!ws->bits) { if (!ws->bits) {
DMERR("%s: couldn't allocate in memory bitset", __func__); DMERR("%s: couldn't allocate in memory bitset", __func__);
...@@ -85,12 +83,14 @@ static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks) ...@@ -85,12 +83,14 @@ static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks)
/* /*
* Wipes the in-core bitset, and creates a new on disk bitset. * Wipes the in-core bitset, and creates a new on disk bitset.
*/ */
static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws) static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws,
dm_block_t nr_blocks)
{ {
int r; int r;
memset(ws->bits, 0, bitset_size(ws->md.nr_bits)); memset(ws->bits, 0, bitset_size(nr_blocks));
ws->md.nr_bits = nr_blocks;
r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root); r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root);
if (r) { if (r) {
DMERR("%s: setup_on_disk_bitset failed", __func__); DMERR("%s: setup_on_disk_bitset failed", __func__);
...@@ -579,6 +579,7 @@ static int open_metadata(struct era_metadata *md) ...@@ -579,6 +579,7 @@ static int open_metadata(struct era_metadata *md)
md->nr_blocks = le32_to_cpu(disk->nr_blocks); md->nr_blocks = le32_to_cpu(disk->nr_blocks);
md->current_era = le32_to_cpu(disk->current_era); md->current_era = le32_to_cpu(disk->current_era);
ws_unpack(&disk->current_writeset, &md->current_writeset->md);
md->writeset_tree_root = le64_to_cpu(disk->writeset_tree_root); md->writeset_tree_root = le64_to_cpu(disk->writeset_tree_root);
md->era_array_root = le64_to_cpu(disk->era_array_root); md->era_array_root = le64_to_cpu(disk->era_array_root);
md->metadata_snap = le64_to_cpu(disk->metadata_snap); md->metadata_snap = le64_to_cpu(disk->metadata_snap);
...@@ -870,7 +871,6 @@ static int metadata_era_archive(struct era_metadata *md) ...@@ -870,7 +871,6 @@ static int metadata_era_archive(struct era_metadata *md)
} }
ws_pack(&md->current_writeset->md, &value); ws_pack(&md->current_writeset->md, &value);
md->current_writeset->md.root = INVALID_WRITESET_ROOT;
keys[0] = md->current_era; keys[0] = md->current_era;
__dm_bless_for_disk(&value); __dm_bless_for_disk(&value);
...@@ -882,6 +882,7 @@ static int metadata_era_archive(struct era_metadata *md) ...@@ -882,6 +882,7 @@ static int metadata_era_archive(struct era_metadata *md)
return r; return r;
} }
md->current_writeset->md.root = INVALID_WRITESET_ROOT;
md->archived_writesets = true; md->archived_writesets = true;
return 0; return 0;
...@@ -898,7 +899,7 @@ static int metadata_new_era(struct era_metadata *md) ...@@ -898,7 +899,7 @@ static int metadata_new_era(struct era_metadata *md)
int r; int r;
struct writeset *new_writeset = next_writeset(md); struct writeset *new_writeset = next_writeset(md);
r = writeset_init(&md->bitset_info, new_writeset); r = writeset_init(&md->bitset_info, new_writeset, md->nr_blocks);
if (r) { if (r) {
DMERR("%s: writeset_init failed", __func__); DMERR("%s: writeset_init failed", __func__);
return r; return r;
...@@ -951,7 +952,7 @@ static int metadata_commit(struct era_metadata *md) ...@@ -951,7 +952,7 @@ static int metadata_commit(struct era_metadata *md)
int r; int r;
struct dm_block *sblock; struct dm_block *sblock;
if (md->current_writeset->md.root != SUPERBLOCK_LOCATION) { if (md->current_writeset->md.root != INVALID_WRITESET_ROOT) {
r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root,
&md->current_writeset->md.root); &md->current_writeset->md.root);
if (r) { if (r) {
...@@ -1565,7 +1566,7 @@ static int era_preresume(struct dm_target *ti) ...@@ -1565,7 +1566,7 @@ static int era_preresume(struct dm_target *ti)
start_worker(era); start_worker(era);
r = in_worker0(era, metadata_new_era); r = in_worker0(era, metadata_era_rollover);
if (r) { if (r) {
DMERR("%s: metadata_era_rollover failed", __func__); DMERR("%s: metadata_era_rollover failed", __func__);
return r; return r;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment