Commit 13c1e583 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Improve -o norecovery; opts.recovery_pass_limit

This adds opts.recovery_pass_limit, and redoes -o norecovery to make use
of it; this fixes some issues with -o norecovery so it can be safely
used for data recovery.

Norecovery means "don't do journal replay"; it's an important data
recovery tool when we're getting stuck in journal replay.

When using it this way we need to make sure we don't free journal keys
after startup, so we continue to overlay them: thus it needs to imply
retain_recovery_info, as well as nochanges.

recovery_pass_limit is an explicit option for telling recovery to exit
after a specific recovery pass; this is a much cleaner way of
implementing -o norecovery, as well as being a useful debug feature in
its own right.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 060ff30a
......@@ -7,6 +7,7 @@
#include "disk_groups.h"
#include "error.h"
#include "opts.h"
#include "recovery_passes.h"
#include "super-io.h"
#include "util.h"
......@@ -205,6 +206,9 @@ const struct bch_option bch2_opt_table[] = {
#define OPT_STR(_choices) .type = BCH_OPT_STR, \
.min = 0, .max = ARRAY_SIZE(_choices), \
.choices = _choices
#define OPT_STR_NOLIMIT(_choices) .type = BCH_OPT_STR, \
.min = 0, .max = U64_MAX, \
.choices = _choices
#define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn
#define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \
......
......@@ -362,7 +362,12 @@ enum fsck_err_opts {
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
NULL, "Don't replay the journal") \
NULL, "Exit recovery immediately prior to journal replay")\
x(recovery_pass_last, u8, \
OPT_FS|OPT_MOUNT, \
OPT_STR_NOLIMIT(bch2_recovery_passes), \
BCH2_NO_SB_OPT, 0, \
NULL, "Exit recovery after specified pass") \
x(keep_journal, u8, \
0, \
OPT_BOOL(), \
......
......@@ -269,7 +269,8 @@ int bch2_journal_replay(struct bch_fs *c)
bch2_trans_put(trans);
trans = NULL;
if (!c->opts.keep_journal)
if (!c->opts.keep_journal &&
c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay)
bch2_journal_keys_put_initial(c);
replay_now_at(j, j->replay_journal_seq_end);
......@@ -584,11 +585,8 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
if (c->opts.fsck && c->opts.norecovery) {
bch_err(c, "cannot select both norecovery and fsck");
ret = -EINVAL;
goto err;
}
if (c->opts.norecovery)
c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1;
if (!c->opts.nochanges) {
mutex_lock(&c->sb_lock);
......
......@@ -27,7 +27,7 @@ const char * const bch2_recovery_passes[] = {
static int bch2_check_allocations(struct bch_fs *c)
{
return bch2_gc(c, true, c->opts.norecovery);
return bch2_gc(c, true, false);
}
static int bch2_set_may_go_rw(struct bch_fs *c)
......@@ -144,8 +144,6 @@ static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pa
{
struct recovery_pass_fn *p = recovery_pass_fns + pass;
if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read)
return false;
if (c->recovery_passes_explicit & BIT_ULL(pass))
return true;
if ((p->when & PASS_FSCK) && c->opts.fsck)
......@@ -201,6 +199,10 @@ int bch2_run_recovery_passes(struct bch_fs *c)
int ret = 0;
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
if (c->opts.recovery_pass_last &&
c->curr_recovery_pass > c->opts.recovery_pass_last)
break;
if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
unsigned pass = c->curr_recovery_pass;
......@@ -213,8 +215,10 @@ int bch2_run_recovery_passes(struct bch_fs *c)
c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
}
c->curr_recovery_pass++;
c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
c->curr_recovery_pass++;
}
return ret;
......
......@@ -131,7 +131,7 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
rcu_read_lock();
struct snapshot_table *t = rcu_dereference(c->snapshots);
if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) {
if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
goto out;
}
......
......@@ -365,7 +365,7 @@ void bch2_fs_read_only(struct bch_fs *c)
!test_bit(BCH_FS_emergency_ro, &c->flags) &&
test_bit(BCH_FS_started, &c->flags) &&
test_bit(BCH_FS_clean_shutdown, &c->flags) &&
!c->opts.norecovery) {
c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) {
BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
BUG_ON(atomic_read(&c->btree_cache.dirty));
BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
......@@ -510,7 +510,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
int bch2_fs_read_write(struct bch_fs *c)
{
if (c->opts.norecovery)
if (c->opts.recovery_pass_last &&
c->opts.recovery_pass_last < BCH_RECOVERY_PASS_journal_replay)
return -BCH_ERR_erofs_norecovery;
if (c->opts.nochanges)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment