Commit 922bc5a0 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Make topology repair a normal recovery pass

This adds bch2_run_explicit_recovery_pass(), for rewinding recovery and
explicitly running a specific recovery pass - this is a more general
replacement for how we were running topology repair before.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent ae2e13d7
......@@ -563,7 +563,6 @@ enum {
BCH_FS_CLEAN_SHUTDOWN,
/* fsck passes: */
BCH_FS_TOPOLOGY_REPAIR_DONE,
BCH_FS_FSCK_DONE,
BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */
BCH_FS_NEED_ANOTHER_GC,
......@@ -666,6 +665,7 @@ enum bch_write_ref {
x(stripes_read, PASS_ALWAYS) \
x(initialize_subvolumes, 0) \
x(snapshots_read, PASS_ALWAYS) \
x(check_topology, 0) \
x(check_allocations, PASS_FSCK) \
x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \
x(journal_replay, PASS_ALWAYS) \
......@@ -1185,11 +1185,14 @@ static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev)
static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c,
enum bch_recovery_pass pass)
{
BUG_ON(c->curr_recovery_pass < pass);
c->recovery_passes_explicit |= BIT_ULL(pass);
if (c->curr_recovery_pass >= pass) {
c->curr_recovery_pass = pass;
return -BCH_ERR_restart_recovery;
} else {
return 0;
}
}
#define BKEY_PADDED_ONSTACK(key, pad) \
......
......@@ -40,6 +40,12 @@
#define DROP_THIS_NODE 10
#define DROP_PREV_NODE 11
static bool should_restart_for_topology_repair(struct bch_fs *c)
{
return c->opts.fix_errors != FSCK_FIX_no &&
!(c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology));
}
static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
preempt_disable();
......@@ -96,9 +102,9 @@ static int bch2_gc_check_topology(struct bch_fs *c,
" cur %s",
bch2_btree_ids[b->c.btree_id], b->c.level,
buf1.buf, buf2.buf) &&
!test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) {
should_restart_for_topology_repair(c)) {
bch_info(c, "Halting mark and sweep to start topology repair pass");
ret = -BCH_ERR_need_topology_repair;
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
goto err;
} else {
set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
......@@ -124,9 +130,9 @@ static int bch2_gc_check_topology(struct bch_fs *c,
" expected %s",
bch2_btree_ids[b->c.btree_id], b->c.level,
buf1.buf, buf2.buf) &&
!test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) {
should_restart_for_topology_repair(c)) {
bch_info(c, "Halting mark and sweep to start topology repair pass");
ret = -BCH_ERR_need_topology_repair;
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
goto err;
} else {
set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
......@@ -520,7 +526,7 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct
return ret;
}
static int bch2_repair_topology(struct bch_fs *c)
int bch2_check_topology(struct bch_fs *c)
{
struct btree_trans trans;
struct btree *b;
......@@ -969,9 +975,9 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
b->c.level - 1,
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur.k)), buf.buf)) &&
!test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) {
ret = -BCH_ERR_need_topology_repair;
should_restart_for_topology_repair(c)) {
bch_info(c, "Halting mark and sweep to start topology repair pass");
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
goto fsck_err;
} else {
/* Continue marking when opted to not
......@@ -1805,32 +1811,8 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
bch2_mark_superblocks(c);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) ||
(BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb) &&
c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations &&
c->opts.fix_errors != FSCK_FIX_no)) {
bch_info(c, "Starting topology repair pass");
ret = bch2_repair_topology(c);
if (ret)
goto out;
bch_info(c, "Topology repair pass done");
set_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags);
}
ret = bch2_gc_btrees(c, initial, metadata_only);
if (ret == -BCH_ERR_need_topology_repair &&
!test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags) &&
c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations) {
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, true);
ret = 0;
}
if (ret == -BCH_ERR_need_topology_repair)
ret = -BCH_ERR_fsck_errors_not_fixed;
if (ret)
goto out;
......
......@@ -4,6 +4,7 @@
#include "btree_types.h"
int bch2_check_topology(struct bch_fs *);
int bch2_gc(struct bch_fs *, bool, bool);
int bch2_gc_gens(struct bch_fs *);
void bch2_gc_thread_stop(struct bch_fs *);
......
......@@ -610,7 +610,7 @@ static int __btree_err(enum btree_err_type type,
case BTREE_ERR_BAD_NODE:
bch2_print_string_as_lines(KERN_ERR, out.buf);
bch2_topology_error(c);
ret = -BCH_ERR_need_topology_repair;
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: -EIO;
break;
case BTREE_ERR_INCOMPATIBLE:
bch2_print_string_as_lines(KERN_ERR, out.buf);
......@@ -1566,7 +1566,8 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
btree_pos_to_text(&buf, c, b);
bch_err(c, "%s", buf.buf);
if (test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags))
if (c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology)
bch2_fatal_error(c);
set_btree_node_read_error(b);
......
......@@ -158,7 +158,6 @@
x(BCH_ERR_fsck, fsck_repair_unimplemented) \
x(BCH_ERR_fsck, fsck_repair_impossible) \
x(0, restart_recovery) \
x(0, need_topology_repair) \
x(0, unwritten_extent_update) \
x(EINVAL, device_state_not_allowed) \
x(EINVAL, member_info_missing) \
......
......@@ -27,9 +27,6 @@ bool bch2_inconsistent_error(struct bch_fs *c)
void bch2_topology_error(struct bch_fs *c)
{
if (!test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags))
return;
set_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags);
if (test_bit(BCH_FS_FSCK_DONE, &c->flags))
bch2_inconsistent_error(c);
......
......@@ -1262,17 +1262,16 @@ static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
static int bch2_run_recovery_passes(struct bch_fs *c)
{
int ret = 0;
again:
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_passes)) {
ret = bch2_run_recovery_pass(c, c->curr_recovery_pass);
if (bch2_err_matches(ret, BCH_ERR_restart_recovery))
continue;
if (ret)
break;
c->curr_recovery_pass++;
}
if (bch2_err_matches(ret, BCH_ERR_restart_recovery))
goto again;
return ret;
}
......@@ -1450,6 +1449,11 @@ int bch2_fs_recovery(struct bch_fs *c)
if (ret)
goto err;
if (c->opts.fsck &&
(IS_ENABLED(CONFIG_BCACHEFS_DEBUG) ||
BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)))
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
ret = bch2_run_recovery_passes(c);
if (ret)
goto err;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment