Commit 5aabf7c4 authored by Song Liu's avatar Song Liu Committed by Shaohua Li

md/r5cache: r5cache recovery: part 2

1. In previous patch, we:
      - add new data to r5l_recovery_ctx
      - add new functions to recovery write-back cache
   The new functions are not used in this patch, so this patch does not
   change the behavior of recovery.

2. In this patchpatch, we:
      - modify main recovery procedure r5l_recovery_log() to call new
        functions
      - remove old functions
Signed-off-by: default avatarSong Liu <songliubraving@fb.com>
Signed-off-by: default avatarShaohua Li <shli@fb.com>
parent b4c625c6
...@@ -1393,156 +1393,6 @@ static int r5l_recovery_read_meta_block(struct r5l_log *log, ...@@ -1393,156 +1393,6 @@ static int r5l_recovery_read_meta_block(struct r5l_log *log,
return 0; return 0;
} }
static int r5l_recovery_flush_one_stripe(struct r5l_log *log,
struct r5l_recovery_ctx *ctx,
sector_t stripe_sect,
int *offset)
{
struct r5conf *conf = log->rdev->mddev->private;
struct stripe_head *sh;
struct r5l_payload_data_parity *payload;
int disk_index;
sh = raid5_get_active_stripe(conf, stripe_sect, 0, 0, 0);
while (1) {
sector_t log_offset = r5l_ring_add(log, ctx->pos,
ctx->meta_total_blocks);
payload = page_address(ctx->meta_page) + *offset;
if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_DATA) {
raid5_compute_sector(conf,
le64_to_cpu(payload->location), 0,
&disk_index, sh);
sync_page_io(log->rdev, log_offset, PAGE_SIZE,
sh->dev[disk_index].page, REQ_OP_READ, 0,
false);
sh->dev[disk_index].log_checksum =
le32_to_cpu(payload->checksum[0]);
set_bit(R5_Wantwrite, &sh->dev[disk_index].flags);
} else {
disk_index = sh->pd_idx;
sync_page_io(log->rdev, log_offset, PAGE_SIZE,
sh->dev[disk_index].page, REQ_OP_READ, 0,
false);
sh->dev[disk_index].log_checksum =
le32_to_cpu(payload->checksum[0]);
set_bit(R5_Wantwrite, &sh->dev[disk_index].flags);
if (sh->qd_idx >= 0) {
disk_index = sh->qd_idx;
sync_page_io(log->rdev,
r5l_ring_add(log, log_offset, BLOCK_SECTORS),
PAGE_SIZE, sh->dev[disk_index].page,
REQ_OP_READ, 0, false);
sh->dev[disk_index].log_checksum =
le32_to_cpu(payload->checksum[1]);
set_bit(R5_Wantwrite,
&sh->dev[disk_index].flags);
}
}
ctx->meta_total_blocks += le32_to_cpu(payload->size);
*offset += sizeof(struct r5l_payload_data_parity) +
sizeof(__le32) *
(le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9));
if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_PARITY)
break;
}
for (disk_index = 0; disk_index < sh->disks; disk_index++) {
void *addr;
u32 checksum;
if (!test_bit(R5_Wantwrite, &sh->dev[disk_index].flags))
continue;
addr = kmap_atomic(sh->dev[disk_index].page);
checksum = crc32c_le(log->uuid_checksum, addr, PAGE_SIZE);
kunmap_atomic(addr);
if (checksum != sh->dev[disk_index].log_checksum)
goto error;
}
for (disk_index = 0; disk_index < sh->disks; disk_index++) {
struct md_rdev *rdev, *rrdev;
if (!test_and_clear_bit(R5_Wantwrite,
&sh->dev[disk_index].flags))
continue;
/* in case device is broken */
rcu_read_lock();
rdev = rcu_dereference(conf->disks[disk_index].rdev);
if (rdev) {
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
sync_page_io(rdev, stripe_sect, PAGE_SIZE,
sh->dev[disk_index].page, REQ_OP_WRITE, 0,
false);
rdev_dec_pending(rdev, rdev->mddev);
rcu_read_lock();
}
rrdev = rcu_dereference(conf->disks[disk_index].replacement);
if (rrdev) {
atomic_inc(&rrdev->nr_pending);
rcu_read_unlock();
sync_page_io(rrdev, stripe_sect, PAGE_SIZE,
sh->dev[disk_index].page, REQ_OP_WRITE, 0,
false);
rdev_dec_pending(rrdev, rrdev->mddev);
rcu_read_lock();
}
rcu_read_unlock();
}
raid5_release_stripe(sh);
return 0;
error:
for (disk_index = 0; disk_index < sh->disks; disk_index++)
sh->dev[disk_index].flags = 0;
raid5_release_stripe(sh);
return -EINVAL;
}
static int r5l_recovery_flush_one_meta(struct r5l_log *log,
struct r5l_recovery_ctx *ctx)
{
struct r5conf *conf = log->rdev->mddev->private;
struct r5l_payload_data_parity *payload;
struct r5l_meta_block *mb;
int offset;
sector_t stripe_sector;
mb = page_address(ctx->meta_page);
offset = sizeof(struct r5l_meta_block);
while (offset < le32_to_cpu(mb->meta_size)) {
int dd;
payload = (void *)mb + offset;
stripe_sector = raid5_compute_sector(conf,
le64_to_cpu(payload->location), 0, &dd, NULL);
if (r5l_recovery_flush_one_stripe(log, ctx, stripe_sector,
&offset))
return -EINVAL;
}
return 0;
}
/* copy data/parity from log to raid disks */
static void r5l_recovery_flush_log(struct r5l_log *log,
struct r5l_recovery_ctx *ctx)
{
while (1) {
if (r5l_recovery_read_meta_block(log, ctx))
return;
if (r5l_recovery_flush_one_meta(log, ctx))
return;
ctx->seq++;
ctx->pos = r5l_ring_add(log, ctx->pos, ctx->meta_total_blocks);
}
}
static void static void
r5l_recovery_create_empty_meta_block(struct r5l_log *log, r5l_recovery_create_empty_meta_block(struct r5l_log *log,
struct page *page, struct page *page,
...@@ -2166,7 +2016,9 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, ...@@ -2166,7 +2016,9 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
static int r5l_recovery_log(struct r5l_log *log) static int r5l_recovery_log(struct r5l_log *log)
{ {
struct mddev *mddev = log->rdev->mddev;
struct r5l_recovery_ctx ctx; struct r5l_recovery_ctx ctx;
int ret;
ctx.pos = log->last_checkpoint; ctx.pos = log->last_checkpoint;
ctx.seq = log->last_cp_seq; ctx.seq = log->last_cp_seq;
...@@ -2178,47 +2030,33 @@ static int r5l_recovery_log(struct r5l_log *log) ...@@ -2178,47 +2030,33 @@ static int r5l_recovery_log(struct r5l_log *log)
if (!ctx.meta_page) if (!ctx.meta_page)
return -ENOMEM; return -ENOMEM;
r5l_recovery_flush_log(log, &ctx); ret = r5c_recovery_flush_log(log, &ctx);
__free_page(ctx.meta_page); __free_page(ctx.meta_page);
/*
* we did a recovery. Now ctx.pos points to an invalid meta block. New
* log will start here. but we can't let superblock point to last valid
* meta block. The log might looks like:
* | meta 1| meta 2| meta 3|
* meta 1 is valid, meta 2 is invalid. meta 3 could be valid. If
* superblock points to meta 1, we write a new valid meta 2n. if crash
* happens again, new recovery will start from meta 1. Since meta 2n is
* valid now, recovery will think meta 3 is valid, which is wrong.
* The solution is we create a new meta in meta2 with its seq == meta
* 1's seq + 10 and let superblock points to meta2. The same recovery will
* not think meta 3 is a valid meta, because its seq doesn't match
*/
if (ctx.seq > log->last_cp_seq) {
int ret;
ret = r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq + 10);
if (ret) if (ret)
return ret; return ret;
log->seq = ctx.seq + 11;
log->log_start = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
r5l_write_super(log, ctx.pos);
log->last_checkpoint = ctx.pos;
log->next_checkpoint = ctx.pos;
} else {
log->log_start = ctx.pos;
log->seq = ctx.seq;
}
/* if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))
* This is to suppress "function defined but not used" warning. pr_debug("md/raid:%s: starting from clean shutdown\n",
* It will be removed when the two functions are used (next patch). mdname(mddev));
*/ else {
if (!log) { pr_debug("md/raid:%s: recoverying %d data-only stripes and %d data-parity stripes\n",
r5c_recovery_flush_log(log, &ctx); mdname(mddev), ctx.data_only_stripes,
r5c_recovery_rewrite_data_only_stripes(log, &ctx); ctx.data_parity_stripes);
if (ctx.data_only_stripes > 0)
if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) {
pr_err("md/raid:%s: failed to rewrite stripes to journal\n",
mdname(mddev));
return -EIO;
}
} }
log->log_start = ctx.pos;
log->next_checkpoint = ctx.pos;
log->seq = ctx.seq;
r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq);
r5l_write_super(log, ctx.pos);
return 0; return 0;
} }
......
...@@ -7100,7 +7100,8 @@ static int raid5_run(struct mddev *mddev) ...@@ -7100,7 +7100,8 @@ static int raid5_run(struct mddev *mddev)
pr_debug("md/raid:%s: using device %s as journal\n", pr_debug("md/raid:%s: using device %s as journal\n",
mdname(mddev), bdevname(journal_dev->bdev, b)); mdname(mddev), bdevname(journal_dev->bdev, b));
r5l_init_log(conf, journal_dev); if (r5l_init_log(conf, journal_dev))
goto abort;
} }
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment