Commit d3ff7fec authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Improved check_directory_structure()

Now that we have inode backpointers, we can simplify checking directory
structure: instead of doing a DFS from the filesystem root and then
checking if we found everything, we can iterate over every inode and see
if we can go up until we get to the root.

This patch also has a number of fixes and simplifications for the inode
backpointer checks. Also, it turns out we don't actually need the
BCH_INODE_BACKPTR_UNTRUSTED flag.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 176cf4bf
......@@ -103,13 +103,12 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
return __bch2_trans_commit(trans);
}
#define __bch2_trans_do(_trans, _disk_res, _journal_seq, _flags, _do) \
#define lockrestart_do(_trans, _do) \
({ \
int _ret; \
\
while (1) { \
_ret = (_do) ?: bch2_trans_commit(_trans, (_disk_res), \
(_journal_seq), (_flags)); \
_ret = (_do); \
if (_ret != -EINTR) \
break; \
bch2_trans_reset(_trans, 0); \
......@@ -118,6 +117,10 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
_ret; \
})
#define __bch2_trans_do(_trans, _disk_res, _journal_seq, _flags, _do) \
lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_flags)))
#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \
({ \
struct btree_trans trans; \
......
......@@ -110,8 +110,6 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum,
inode_u->bi_ctime = now;
bch2_inode_nlink_inc(inode_u);
inode_u->bi_flags |= BCH_INODE_BACKPTR_UNTRUSTED;
dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, 0);
ret = PTR_ERR_OR_ZERO(dir_iter);
if (ret)
......@@ -175,6 +173,12 @@ int bch2_unlink_trans(struct btree_trans *trans,
if (ret)
goto err;
if (inode_u->bi_dir == k.k->p.inode &&
inode_u->bi_dir_offset == k.k->p.offset) {
inode_u->bi_dir = 0;
inode_u->bi_dir_offset = 0;
}
dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
dir_u->bi_nlink -= S_ISDIR(inode_u->bi_mode);
bch2_inode_nlink_dec(inode_u);
......
......@@ -59,7 +59,7 @@ static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
? bch2_inode_unpack(bkey_s_c_to_inode(k), inode)
: -ENOENT;
err:
bch2_trans_iter_put(trans, iter);
bch2_trans_iter_free(trans, iter);
return ret;
}
......@@ -134,27 +134,26 @@ static int __reattach_inode(struct btree_trans *trans,
{
struct bch_hash_info dir_hash =
bch2_hash_info_init(trans->c, lostfound);
struct btree_iter *dir_iter = NULL, *inode_iter = NULL;
struct bch_inode_unpacked inode_u;
char name_buf[20];
struct qstr name;
u64 dir_offset = 0;
u32 snapshot;
int ret;
snprintf(name_buf, sizeof(name_buf), "%llu", inum);
name = (struct qstr) QSTR(name_buf);
inode_iter = bch2_inode_peek(trans, &inode_u, inum, 0);
ret = PTR_ERR_OR_ZERO(inode_iter);
ret = lookup_inode(trans, inum, &inode_u, &snapshot);
if (ret)
goto err;
return ret;
if (S_ISDIR(inode_u.bi_mode)) {
lostfound->bi_nlink++;
ret = write_inode(trans, lostfound, U32_MAX);
if (ret)
goto err;
return ret;
}
ret = bch2_dirent_create(trans, lostfound->bi_inum, &dir_hash,
......@@ -162,18 +161,12 @@ static int __reattach_inode(struct btree_trans *trans,
&name, inum, &dir_offset,
BCH_HASH_SET_MUST_CREATE);
if (ret)
goto err;
return ret;
inode_u.bi_dir = lostfound->bi_inum;
inode_u.bi_dir_offset = dir_offset;
ret = write_inode(trans, &inode_u, U32_MAX);
if (ret)
goto err;
err:
bch2_trans_iter_put(trans, dir_iter);
bch2_trans_iter_put(trans, inode_iter);
return ret;
return write_inode(trans, &inode_u, U32_MAX);
}
static int reattach_inode(struct btree_trans *trans,
......@@ -191,6 +184,30 @@ static int reattach_inode(struct btree_trans *trans,
return ret;
}
static int remove_backpointer(struct btree_trans *trans,
struct bch_inode_unpacked *inode)
{
struct btree_iter *iter;
struct bkey_s_c k;
int ret;
iter = bch2_trans_get_iter(trans, BTREE_ID_dirents,
POS(inode->bi_dir, inode->bi_dir_offset), 0);
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto out;
if (k.k->type != KEY_TYPE_dirent) {
ret = -ENOENT;
goto out;
}
ret = remove_dirent(trans, bkey_s_c_to_dirent(k));
out:
bch2_trans_iter_put(trans, iter);
return ret;
}
struct inode_walker {
bool first_this_inode;
bool have_inode;
......@@ -420,26 +437,18 @@ static int check_inode(struct btree_trans *trans,
do_update = true;
}
if (!S_ISDIR(u.bi_mode) &&
u.bi_nlink &&
!(u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) &&
(fsck_err_on(c->sb.version >= bcachefs_metadata_version_inode_backpointers, c,
"inode missing BCH_INODE_BACKPTR_UNTRUSTED flags") ||
c->opts.version_upgrade)) {
u.bi_flags |= BCH_INODE_BACKPTR_UNTRUSTED;
if (u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) {
u.bi_dir = 0;
u.bi_dir_offset = 0;
u.bi_flags &= ~BCH_INODE_BACKPTR_UNTRUSTED;
do_update = true;
}
if (do_update) {
struct bkey_inode_buf p;
bch2_inode_pack(c, &p, &u);
p.inode.k.p = iter->pos;
ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
(bch2_trans_update(trans, iter, &p.inode.k_i, 0), 0));
bch2_inode_write(trans, iter, &u));
if (ret)
bch_err(c, "error in fsck: error %i "
"updating inode", ret);
......@@ -704,7 +713,8 @@ static int check_dirents(struct bch_fs *c)
mode_to_type(w.inode.bi_mode),
(bch2_bkey_val_to_text(&PBUF(buf), c,
k), buf))) {
ret = bch2_btree_delete_at(&trans, iter, 0);
ret = lockrestart_do(&trans,
bch2_btree_delete_at(&trans, iter, 0));
if (ret)
goto err;
goto next;
......@@ -751,6 +761,16 @@ static int check_dirents(struct bch_fs *c)
if (!have_target)
goto next;
if (!target.bi_dir &&
!target.bi_dir_offset) {
target.bi_dir = k.k->p.inode;
target.bi_dir_offset = k.k->p.offset;
ret = write_inode(&trans, &target, target_snapshot);
if (ret)
goto err;
}
if (!inode_backpointer_matches(d, &target)) {
ret = inode_backpointer_exists(&trans, &target);
if (ret < 0)
......@@ -758,52 +778,47 @@ static int check_dirents(struct bch_fs *c)
backpointer_exists = ret;
ret = 0;
}
if (fsck_err_on(S_ISDIR(target.bi_mode) &&
!inode_backpointer_matches(d, &target) &&
backpointer_exists, c,
"directory %llu with multiple links",
target.bi_inum)) {
ret = remove_dirent(&trans, d);
if (ret)
goto err;
continue;
}
if (!inode_backpointer_matches(d, &target) &&
(S_ISDIR(target.bi_mode) || !target.bi_nlink)) {
if (backpointer_exists) {
if (!fsck_err(c, "inode %llu has multiple links but i_nlink 0",
d_inum))
goto check_type;
if (fsck_err_on(S_ISDIR(target.bi_mode) &&
backpointer_exists, c,
"directory %llu with multiple links",
target.bi_inum)) {
ret = remove_dirent(&trans, d);
if (ret)
goto err;
continue;
}
if (fsck_err_on(backpointer_exists &&
!target.bi_nlink, c,
"inode %llu has multiple links but i_nlink 0",
d_inum)) {
target.bi_nlink++;
target.bi_flags |= BCH_INODE_BACKPTR_UNTRUSTED;
} else {
if (c->sb.version >= bcachefs_metadata_version_inode_backpointers &&
!(target.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) &&
!fsck_err(c, "inode %llu has wrong backpointer:\n"
"got %llu:%llu\n"
"should be %llu:%llu",
d_inum,
target.bi_dir,
target.bi_dir_offset,
k.k->p.inode,
k.k->p.offset))
goto check_type;
target.bi_flags &= ~BCH_INODE_UNLINKED;
ret = write_inode(&trans, &target, target_snapshot);
if (ret)
goto err;
}
if (fsck_err_on(!backpointer_exists, c,
"inode %llu has wrong backpointer:\n"
"got %llu:%llu\n"
"should be %llu:%llu",
d_inum,
target.bi_dir,
target.bi_dir_offset,
k.k->p.inode,
k.k->p.offset)) {
target.bi_dir = k.k->p.inode;
target.bi_dir_offset = k.k->p.offset;
target.bi_flags &= ~BCH_INODE_BACKPTR_UNTRUSTED;
}
ret = write_inode(&trans, &target, target_snapshot);
if (ret)
goto err;
continue;
ret = write_inode(&trans, &target, target_snapshot);
if (ret)
goto err;
}
}
check_type:
if (fsck_err_on(d.v->d_type != mode_to_type(target.bi_mode), c,
"incorrect d_type: should be %u:\n%s",
mode_to_type(target.bi_mode),
......@@ -900,13 +915,13 @@ static int check_xattrs(struct bch_fs *c)
static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
{
struct bkey_inode_buf packed;
u32 snapshot;
int ret;
bch_verbose(c, "checking root directory");
ret = bch2_trans_do(c, NULL, NULL, 0,
__bch2_inode_find_by_inum_trans(&trans, BCACHEFS_ROOT_INO,
root_inode, 0));
lookup_inode(&trans, BCACHEFS_ROOT_INO, root_inode, &snapshot));
if (ret && ret != -ENOENT)
return ret;
......@@ -942,6 +957,7 @@ static int check_lostfound(struct bch_fs *c,
struct bch_hash_info root_hash_info =
bch2_hash_info_init(c, root_inode);
u64 inum;
u32 snapshot;
int ret;
bch_verbose(c, "checking lost+found");
......@@ -954,7 +970,7 @@ static int check_lostfound(struct bch_fs *c,
}
ret = bch2_trans_do(c, NULL, NULL, 0,
__bch2_inode_find_by_inum_trans(&trans, inum, lostfound_inode, 0));
lookup_inode(&trans, inum, lostfound_inode, &snapshot));
if (ret && ret != -ENOENT)
return ret;
......@@ -984,32 +1000,12 @@ static int check_lostfound(struct bch_fs *c,
return ret;
}
typedef GENRADIX(unsigned long) inode_bitmap;
static inline bool inode_bitmap_test(inode_bitmap *b, size_t nr)
{
unsigned long *w = genradix_ptr(b, nr / BITS_PER_LONG);
return w ? test_bit(nr & (BITS_PER_LONG - 1), w) : false;
}
static inline int inode_bitmap_set(inode_bitmap *b, size_t nr)
{
unsigned long *w = genradix_ptr_alloc(b, nr / BITS_PER_LONG, GFP_KERNEL);
if (!w)
return -ENOMEM;
*w |= 1UL << (nr & (BITS_PER_LONG - 1));
return 0;
}
struct pathbuf {
size_t nr;
size_t size;
struct pathbuf_entry {
u64 inum;
u64 offset;
} *entries;
};
......@@ -1020,8 +1016,9 @@ static int path_down(struct pathbuf *p, u64 inum)
void *n = krealloc(p->entries,
new_size * sizeof(p->entries[0]),
GFP_KERNEL);
if (!n)
if (!n) {
return -ENOMEM;
}
p->entries = n;
p->size = new_size;
......@@ -1029,149 +1026,119 @@ static int path_down(struct pathbuf *p, u64 inum)
p->entries[p->nr++] = (struct pathbuf_entry) {
.inum = inum,
.offset = 0,
};
return 0;
}
noinline_for_stack
static int check_directory_structure(struct bch_fs *c,
struct bch_inode_unpacked *lostfound_inode)
static int check_path(struct btree_trans *trans,
struct bch_inode_unpacked *lostfound,
struct pathbuf *p,
struct bch_inode_unpacked *inode)
{
inode_bitmap dirs_done;
struct pathbuf path = { 0, 0, NULL };
struct pathbuf_entry *e;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_s_c_dirent dirent;
bool had_unreachable;
u64 d_inum;
struct bch_fs *c = trans->c;
u32 snapshot;
size_t i;
int ret = 0;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
p->nr = 0;
bch_verbose(c, "checking directory structure");
/* DFS: */
restart_dfs:
genradix_init(&dirs_done);
had_unreachable = false;
ret = inode_bitmap_set(&dirs_done, BCACHEFS_ROOT_INO);
if (ret) {
bch_err(c, "memory allocation failure in inode_bitmap_set()");
goto err;
}
ret = path_down(&path, BCACHEFS_ROOT_INO);
if (ret)
goto err;
while (path.nr) {
next:
e = &path.entries[path.nr - 1];
if (e->offset == U64_MAX)
goto up;
for_each_btree_key(&trans, iter, BTREE_ID_dirents,
POS(e->inum, e->offset + 1), 0, k, ret) {
if (k.k->p.inode != e->inum)
break;
e->offset = k.k->p.offset;
if (k.k->type != KEY_TYPE_dirent)
continue;
while (inode->bi_inum != BCACHEFS_ROOT_INO) {
ret = lockrestart_do(trans,
inode_backpointer_exists(trans, inode));
if (ret < 0)
break;
dirent = bkey_s_c_to_dirent(k);
if (!ret) {
if (fsck_err(c, "unreachable inode %llu, type %u nlink %u backptr %llu:%llu",
inode->bi_inum,
mode_to_type(inode->bi_mode),
inode->bi_nlink,
inode->bi_dir,
inode->bi_dir_offset))
ret = reattach_inode(trans, lostfound, inode->bi_inum);
break;
}
ret = 0;
if (dirent.v->d_type != DT_DIR)
continue;
if (!S_ISDIR(inode->bi_mode))
break;
d_inum = le64_to_cpu(dirent.v->d_inum);
ret = path_down(p, inode->bi_inum);
if (ret) {
bch_err(c, "memory allocation failure");
return ret;
}
if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
"directory %llu has multiple hardlinks",
d_inum)) {
ret = remove_dirent(&trans, dirent);
if (ret)
goto err;
for (i = 0; i < p->nr; i++) {
if (inode->bi_dir != p->entries[i].inum)
continue;
}
ret = inode_bitmap_set(&dirs_done, d_inum);
if (ret) {
bch_err(c, "memory allocation failure in inode_bitmap_set()");
goto err;
}
/* XXX print path */
if (!fsck_err(c, "directory structure loop"))
return 0;
ret = path_down(&path, d_inum);
ret = lockrestart_do(trans,
remove_backpointer(trans, inode));
if (ret) {
goto err;
bch_err(c, "error removing dirent: %i", ret);
break;
}
ret = bch2_trans_iter_free(&trans, iter);
if (ret) {
bch_err(c, "btree error %i in fsck", ret);
goto err;
}
goto next;
ret = reattach_inode(trans, lostfound, inode->bi_inum);
break;
}
ret = bch2_trans_iter_free(&trans, iter) ?: ret;
ret = lockrestart_do(trans,
lookup_inode(trans, inode->bi_dir, inode, &snapshot));
if (ret) {
bch_err(c, "btree error %i in fsck", ret);
goto err;
/* Should have been caught in dirents pass */
bch_err(c, "error looking up parent directory: %i", ret);
break;
}
up:
path.nr--;
}
fsck_err:
if (ret)
bch_err(c, "%s: err %i", __func__, ret);
return ret;
}
iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes, POS_MIN, 0);
retry:
for_each_btree_key_continue(iter, 0, k, ret) {
if (k.k->type != KEY_TYPE_inode)
continue;
/*
* Check for unreachable inodes, as well as loops in the directory structure:
* After check_dirents(), if an inode backpointer doesn't exist that means it's
* unreachable:
*/
static int check_directory_structure(struct bch_fs *c,
struct bch_inode_unpacked *lostfound)
{
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct bch_inode_unpacked u;
struct pathbuf path = { 0, 0, NULL };
int ret;
if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
continue;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
ret = bch2_empty_dir_trans(&trans, k.k->p.inode);
if (ret == -EINTR)
goto retry;
if (!ret)
for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN, 0, k, ret) {
if (k.k->type != KEY_TYPE_inode)
continue;
if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.offset), c,
"unreachable directory found (inum %llu)",
k.k->p.offset)) {
ret = reattach_inode(&trans, lostfound_inode, k.k->p.offset);
if (ret) {
goto err;
}
had_unreachable = true;
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u);
if (ret) {
/* Should have been caught earlier in fsck: */
bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret);
break;
}
}
bch2_trans_iter_free(&trans, iter);
if (ret)
goto err;
if (had_unreachable) {
bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
genradix_free(&dirs_done);
kfree(path.entries);
memset(&dirs_done, 0, sizeof(dirs_done));
memset(&path, 0, sizeof(path));
goto restart_dfs;
ret = check_path(&trans, lostfound, &path, &u);
if (ret)
break;
}
err:
fsck_err:
ret = bch2_trans_exit(&trans) ?: ret;
genradix_free(&dirs_done);
kfree(path.entries);
return ret;
bch2_trans_iter_put(&trans, iter);
BUG_ON(ret == -EINTR);
return bch2_trans_exit(&trans) ?: ret;
}
struct nlink {
......@@ -1254,6 +1221,11 @@ static int check_inode_nlink(struct btree_trans *trans,
if (S_ISDIR(le16_to_cpu(inode.v->bi_mode)))
return 0;
if (!nlink) {
bch_err(c, "no links found to inode %llu", inode.k->p.offset);
return -EINVAL;
}
ret = bch2_inode_unpack(inode, &u);
/* Should never happen, checked by bch2_inode_invalid: */
......@@ -1262,34 +1234,16 @@ static int check_inode_nlink(struct btree_trans *trans,
inode.k->p.inode))
return ret;
/* Improved directory structure pass will catch this: */
if (fsck_err_on(!nlink, c,
"unreachable inode %llu not marked as unlinked (type %u)",
u.bi_inum, mode_to_type(u.bi_mode))) {
ret = reattach_inode(trans, lostfound_inode, u.bi_inum);
if (ret)
return ret;
nlink = 1;
}
if (fsck_err_on(bch2_inode_nlink_get(&u) != nlink, c,
"inode %llu has wrong i_nlink (type %u i_nlink %u, should be %u)",
u.bi_inum, mode_to_type(u.bi_mode),
bch2_inode_nlink_get(&u), nlink)) {
struct bkey_inode_buf p;
if (nlink > 1)
u.bi_flags |= BCH_INODE_BACKPTR_UNTRUSTED;
bch2_inode_nlink_set(&u, nlink);
bch2_inode_pack(c, &p, &u);
p.inode.k.p = iter->pos;
ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
(bch2_trans_update(trans, iter, &p.inode.k_i, 0), 0));
bch2_inode_write(trans, iter, &u));
if (ret)
bch_err(c, "error in fsck: error %i updating inode", ret);
}
......
......@@ -307,7 +307,7 @@ struct btree_iter *bch2_inode_peek(struct btree_trans *trans,
if (ret)
goto err;
ret = k.k->type == KEY_TYPE_inode ? 0 : -EIO;
ret = k.k->type == KEY_TYPE_inode ? 0 : -ENOENT;
if (ret)
goto err;
......@@ -637,39 +637,18 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached)
return ret;
}
int __bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
struct bch_inode_unpacked *inode,
unsigned flags)
static int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
struct bch_inode_unpacked *inode)
{
struct btree_iter *iter;
struct bkey_s_c k;
int ret;
iter = bch2_trans_get_iter(trans, BTREE_ID_inodes,
POS(0, inode_nr), flags);
k = (flags & BTREE_ITER_TYPE) == BTREE_ITER_CACHED
? bch2_btree_iter_peek_cached(iter)
: bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
ret = k.k->type == KEY_TYPE_inode
? bch2_inode_unpack(bkey_s_c_to_inode(k), inode)
: -ENOENT;
err:
iter = bch2_inode_peek(trans, inode, inode_nr, 0);
ret = PTR_ERR_OR_ZERO(iter);
bch2_trans_iter_put(trans, iter);
return ret;
}
int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
struct bch_inode_unpacked *inode)
{
return __bch2_inode_find_by_inum_trans(trans, inode_nr,
inode, BTREE_ITER_CACHED);
}
int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
struct bch_inode_unpacked *inode)
{
......
......@@ -74,10 +74,6 @@ struct btree_iter *bch2_inode_create(struct btree_trans *,
int bch2_inode_rm(struct bch_fs *, u64, bool);
int __bch2_inode_find_by_inum_trans(struct btree_trans *, u64,
struct bch_inode_unpacked *, unsigned);
int bch2_inode_find_by_inum_trans(struct btree_trans *, u64,
struct bch_inode_unpacked *);
int bch2_inode_find_by_inum(struct bch_fs *, u64, struct bch_inode_unpacked *);
static inline struct bch_io_opts bch2_inode_opts_get(struct bch_inode_unpacked *inode)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment