Commit f26c67f4 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Snapshot depth, skiplist fields

This extents KEY_TYPE_snapshot to include some new fields:
 - depth, to indicate depth of this particular node from the root
 - skip[3], skiplist entries for quickly walking back up to the root

These are to improve bch2_snapshot_is_ancestor(), making it O(ln(n))
instead of O(n) in the snapshot tree depth.

Skiplist nodes are picked at random from the set of ancestor nodes, not
some fixed fraction.

This introduces bcachefs_metadata_version 1.1, snapshot_skiplists.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 065bd335
......@@ -1148,6 +1148,8 @@ struct bch_snapshot {
__le32 children[2];
__le32 subvol;
__le32 tree;
__le32 depth;
__le32 skip[3];
};
LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1)
......@@ -1625,7 +1627,9 @@ struct bch_sb_field_journal_seq_blacklist {
x(snapshot_trees, BCH_VERSION(0, 29), \
RECOVERY_PASS_ALL_FSCK) \
x(major_minor, BCH_VERSION(1, 0), \
0)
0) \
x(snapshot_skiplists, BCH_VERSION(1, 1), \
BIT_ULL(BCH_RECOVERY_PASS_check_snapshots))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
......
......@@ -795,6 +795,14 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_commit_flags)))
#define for_each_btree_key_reverse_commit(_trans, _iter, _btree_id, \
_start, _iter_flags, _k, \
_disk_res, _journal_seq, _commit_flags,\
_do) \
for_each_btree_key_reverse(_trans, _iter, _btree_id, _start, _iter_flags, _k,\
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_commit_flags)))
#define for_each_btree_key_upto_commit(_trans, _iter, _btree_id, \
_start, _end, _iter_flags, _k, \
_disk_res, _journal_seq, _commit_flags,\
......
......@@ -594,10 +594,21 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
unsigned iter_flags =
BTREE_ITER_INTENT|
BTREE_ITER_NOT_EXTENTS;
unsigned update_flags = BTREE_TRIGGER_NORUN;
int ret;
/*
* BTREE_UPDATE_KEY_CACHE_RECLAIM disables key cache lookup/update to
* keep the key cache coherent with the underlying btree. Nothing
* besides the allocator is doing updates yet so we don't need key cache
* coherency for non-alloc btrees, and key cache fills for snapshots
* btrees use BTREE_ITER_FILTER_SNAPSHOTS, which isn't available until
* the snapshots recovery pass runs.
*/
if (!k->level && k->btree_id == BTREE_ID_alloc)
iter_flags |= BTREE_ITER_CACHED;
else
update_flags |= BTREE_UPDATE_KEY_CACHE_RECLAIM;
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, k->level,
......@@ -610,7 +621,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
if (k->overwritten)
goto out;
ret = bch2_trans_update(trans, &iter, k->k, BTREE_TRIGGER_NORUN);
ret = bch2_trans_update(trans, &iter, k->k, update_flags);
out:
bch2_trans_iter_exit(trans, &iter);
return ret;
......
This diff is collapsed.
......@@ -37,9 +37,34 @@ static inline struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
return genradix_ptr(&c->snapshots, U32_MAX - id);
}
static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
{
return snapshot_t(c, id)->parent;
}
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
{
#ifdef CONFIG_BCACHEFS_DEBUG
u32 parent = snapshot_t(c, id)->parent;
if (parent &&
snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1)
panic("id %u depth=%u parent %u depth=%u\n",
id, snapshot_t(c, id)->depth,
parent, snapshot_t(c, parent)->depth);
return parent;
#else
return snapshot_t(c, id)->parent;
#endif
}
static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n)
{
while (n--)
id = bch2_snapshot_parent(c, id);
return id;
}
static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
......@@ -84,13 +109,7 @@ static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id)
return 0;
}
static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{
while (id && id < ancestor)
id = bch2_snapshot_parent(c, id);
return id == ancestor;
}
bool bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32);
static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id)
{
......
......@@ -8,6 +8,8 @@ typedef DARRAY(u32) snapshot_id_list;
struct snapshot_t {
u32 parent;
u32 skip[3];
u32 depth;
u32 children[2];
u32 subvol; /* Nonzero only if a subvolume points to this node: */
u32 tree;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment