Commit bf3efff5 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Fix race leading to btree node write getting stuck

Checking btree_node_may_write() isn't atomic with the other btree flags,
dirty and need_write in particular. There was a rare race where we'd
unblock a node from writing while __btree_node_flush() was setting
need_write, and no thread would notice that the node was now both able
to write and needed to be written.

Fix this by adding btree node flags for will_make_reachable and
write_blocked that can be checked in the cmpxchg loop in
__bch2_btree_node_write.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent 6f5f747c
...@@ -223,10 +223,9 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) ...@@ -223,10 +223,9 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
goto wait_on_io; goto wait_on_io;
} }
if (btree_node_noevict(b)) if (btree_node_noevict(b) ||
goto out_unlock; btree_node_write_blocked(b) ||
btree_node_will_make_reachable(b))
if (!btree_node_may_write(b))
goto out_unlock; goto out_unlock;
if (btree_node_dirty(b)) { if (btree_node_dirty(b)) {
......
...@@ -1606,7 +1606,8 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b) ...@@ -1606,7 +1606,8 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
if ((old & (1U << BTREE_NODE_dirty)) && if ((old & (1U << BTREE_NODE_dirty)) &&
(old & (1U << BTREE_NODE_need_write)) && (old & (1U << BTREE_NODE_need_write)) &&
!(old & (1U << BTREE_NODE_never_write)) && !(old & (1U << BTREE_NODE_never_write)) &&
btree_node_may_write(b)) { !(old & (1U << BTREE_NODE_write_blocked)) &&
!(old & (1U << BTREE_NODE_will_make_reachable))) {
new &= ~(1U << BTREE_NODE_dirty); new &= ~(1U << BTREE_NODE_dirty);
new &= ~(1U << BTREE_NODE_need_write); new &= ~(1U << BTREE_NODE_need_write);
new |= (1U << BTREE_NODE_write_in_flight); new |= (1U << BTREE_NODE_write_in_flight);
...@@ -1778,10 +1779,13 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) ...@@ -1778,10 +1779,13 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
!(old & (1 << BTREE_NODE_need_write))) !(old & (1 << BTREE_NODE_need_write)))
return; return;
if (!btree_node_may_write(b)) if (old &
((1 << BTREE_NODE_never_write)|
(1 << BTREE_NODE_write_blocked)))
return; return;
if (old & (1 << BTREE_NODE_never_write)) if (b->written &&
(old & (1 << BTREE_NODE_will_make_reachable)))
return; return;
if (old & (1 << BTREE_NODE_write_in_flight)) if (old & (1 << BTREE_NODE_write_in_flight))
......
...@@ -62,12 +62,6 @@ void __bch2_btree_node_wait_on_write(struct btree *); ...@@ -62,12 +62,6 @@ void __bch2_btree_node_wait_on_write(struct btree *);
void bch2_btree_node_wait_on_read(struct btree *); void bch2_btree_node_wait_on_read(struct btree *);
void bch2_btree_node_wait_on_write(struct btree *); void bch2_btree_node_wait_on_write(struct btree *);
static inline bool btree_node_may_write(struct btree *b)
{
return list_empty_careful(&b->write_blocked) &&
(!b->written || !b->will_make_reachable);
}
enum compact_mode { enum compact_mode {
COMPACT_LAZY, COMPACT_LAZY,
COMPACT_ALL, COMPACT_ALL,
......
...@@ -434,6 +434,8 @@ struct btree_trans { ...@@ -434,6 +434,8 @@ struct btree_trans {
x(read_error) \ x(read_error) \
x(dirty) \ x(dirty) \
x(need_write) \ x(need_write) \
x(write_blocked) \
x(will_make_reachable) \
x(noevict) \ x(noevict) \
x(write_idx) \ x(write_idx) \
x(accessed) \ x(accessed) \
......
...@@ -606,6 +606,8 @@ static void btree_update_nodes_written(struct btree_update *as) ...@@ -606,6 +606,8 @@ static void btree_update_nodes_written(struct btree_update *as)
mutex_lock(&c->btree_interior_update_lock); mutex_lock(&c->btree_interior_update_lock);
list_del(&as->write_blocked_list); list_del(&as->write_blocked_list);
if (list_empty(&b->write_blocked))
clear_btree_node_write_blocked(b);
/* /*
* Node might have been freed, recheck under * Node might have been freed, recheck under
...@@ -650,6 +652,7 @@ static void btree_update_nodes_written(struct btree_update *as) ...@@ -650,6 +652,7 @@ static void btree_update_nodes_written(struct btree_update *as)
BUG_ON(b->will_make_reachable != (unsigned long) as); BUG_ON(b->will_make_reachable != (unsigned long) as);
b->will_make_reachable = 0; b->will_make_reachable = 0;
clear_btree_node_will_make_reachable(b);
} }
mutex_unlock(&c->btree_interior_update_lock); mutex_unlock(&c->btree_interior_update_lock);
...@@ -716,6 +719,8 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b) ...@@ -716,6 +719,8 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
as->mode = BTREE_INTERIOR_UPDATING_NODE; as->mode = BTREE_INTERIOR_UPDATING_NODE;
as->b = b; as->b = b;
set_btree_node_write_blocked(b);
list_add(&as->write_blocked_list, &b->write_blocked); list_add(&as->write_blocked_list, &b->write_blocked);
mutex_unlock(&c->btree_interior_update_lock); mutex_unlock(&c->btree_interior_update_lock);
...@@ -781,6 +786,7 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree ...@@ -781,6 +786,7 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree
as->new_nodes[as->nr_new_nodes++] = b; as->new_nodes[as->nr_new_nodes++] = b;
b->will_make_reachable = 1UL|(unsigned long) as; b->will_make_reachable = 1UL|(unsigned long) as;
set_btree_node_will_make_reachable(b);
mutex_unlock(&c->btree_interior_update_lock); mutex_unlock(&c->btree_interior_update_lock);
...@@ -803,6 +809,7 @@ static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b) ...@@ -803,6 +809,7 @@ static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b)
* xchg() is for synchronization with bch2_btree_complete_write: * xchg() is for synchronization with bch2_btree_complete_write:
*/ */
v = xchg(&b->will_make_reachable, 0); v = xchg(&b->will_make_reachable, 0);
clear_btree_node_will_make_reachable(b);
as = (struct btree_update *) (v & ~1UL); as = (struct btree_update *) (v & ~1UL);
if (!as) { if (!as) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment