Commit f96c0df4 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Fix a deadlock in bch2_btree_node_get_sibling()

There was a bad interaction with bch2_btree_iter_set_pos_same_leaf(),
which can leave a btree node locked that is just outside iter->pos,
breaking the lock ordering checks in __bch2_btree_node_lock(). Ideally
we should get rid of this corner case, but for now fix it locally with
verbose comments.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 495aabed
...@@ -851,6 +851,18 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, ...@@ -851,6 +851,18 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
if (!parent) if (!parent)
return NULL; return NULL;
/*
* There's a corner case where a btree_iter might have a node locked
* that is just outside its current pos - when
* bch2_btree_iter_set_pos_same_leaf() gets to the end of the node.
*
* But the lock ordering checks in __bch2_btree_node_lock() go off of
* iter->pos, not the node's key: so if the iterator is marked as
* needing to be traversed, we risk deadlock if we don't bail out here:
*/
if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
return ERR_PTR(-EINTR);
if (!bch2_btree_node_relock(iter, level + 1)) { if (!bch2_btree_node_relock(iter, level + 1)) {
ret = ERR_PTR(-EINTR); ret = ERR_PTR(-EINTR);
goto out; goto out;
......
...@@ -205,8 +205,9 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, ...@@ -205,8 +205,9 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
if (!linked->nodes_locked) if (!linked->nodes_locked)
continue; continue;
/* * Must lock btree nodes in key order: */ /* Must lock btree nodes in key order: */
if (__btree_iter_cmp(iter->btree_id, pos, linked) < 0) if ((cmp_int(iter->btree_id, linked->btree_id) ?:
bkey_cmp(pos, linked->pos)) < 0)
ret = false; ret = false;
/* /*
...@@ -1320,6 +1321,16 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_ ...@@ -1320,6 +1321,16 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_
btree_iter_advance_to_pos(iter, l, -1); btree_iter_advance_to_pos(iter, l, -1);
/*
* XXX:
* keeping a node locked that's outside (even just outside) iter->pos
* breaks __bch2_btree_node_lock(). This seems to only affect
* bch2_btree_node_get_sibling so for now it's fixed there, but we
* should try to get rid of this corner case.
*
* (this behaviour is currently needed for BTREE_INSERT_NOUNLOCK)
*/
if (bch2_btree_node_iter_end(&l->iter) && if (bch2_btree_node_iter_end(&l->iter) &&
btree_iter_pos_after_node(iter, l->b)) btree_iter_pos_after_node(iter, l->b))
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
...@@ -2195,6 +2206,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, ...@@ -2195,6 +2206,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
bch2_trans_preload_mem(trans, expected_mem_bytes); bch2_trans_preload_mem(trans, expected_mem_bytes);
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
trans->pid = current->pid;
mutex_lock(&c->btree_trans_lock); mutex_lock(&c->btree_trans_lock);
list_add(&trans->list, &c->btree_trans_list); list_add(&trans->list, &c->btree_trans_list);
mutex_unlock(&c->btree_trans_lock); mutex_unlock(&c->btree_trans_lock);
...@@ -2233,7 +2245,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c) ...@@ -2233,7 +2245,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
mutex_lock(&c->btree_trans_lock); mutex_lock(&c->btree_trans_lock);
list_for_each_entry(trans, &c->btree_trans_list, list) { list_for_each_entry(trans, &c->btree_trans_list, list) {
pr_buf(out, "%ps\n", (void *) trans->ip); pr_buf(out, "%i %ps\n", trans->pid, (void *) trans->ip);
trans_for_each_iter(trans, iter) { trans_for_each_iter(trans, iter) {
if (!iter->nodes_locked) if (!iter->nodes_locked)
......
...@@ -172,17 +172,10 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos); ...@@ -172,17 +172,10 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool); void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos); void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
static inline int __btree_iter_cmp(enum btree_id id,
struct bpos pos,
const struct btree_iter *r)
{
return cmp_int(id, r->btree_id) ?: bkey_cmp(pos, r->pos);
}
static inline int btree_iter_cmp(const struct btree_iter *l, static inline int btree_iter_cmp(const struct btree_iter *l,
const struct btree_iter *r) const struct btree_iter *r)
{ {
return __btree_iter_cmp(l->btree_id, l->pos, r); return cmp_int(l->btree_id, r->btree_id) ?: bkey_cmp(l->pos, r->pos);
} }
/* /*
......
...@@ -287,6 +287,7 @@ struct btree_trans { ...@@ -287,6 +287,7 @@ struct btree_trans {
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
struct list_head list; struct list_head list;
struct btree *locking; struct btree *locking;
pid_t pid;
#endif #endif
unsigned long ip; unsigned long ip;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment