Commit e242b92a authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Fix for long running btree transactions & key cache

While a btree transaction is running, we hold a SRCU read lock on the
btree key cache that prevents btree key cache keys from being freed -
this is so that relock() operations won't access freed memory.

The downside of this is that long running btree transactions prevent
memory from being freed from the key cache. This adds a check in
bch2_trans_begin() - if the transaction has been running longer than 1
second, drop and retake the SRCU read lock and zero out pointers to
unlock key cache paths.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 67ace272
...@@ -2756,6 +2756,20 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) ...@@ -2756,6 +2756,20 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
return p; return p;
} }
static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
struct btree_path *path;
trans_for_each_path(trans, path)
if (path->cached && !btree_node_locked(path, 0))
path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_lock_time = jiffies;
}
/** /**
* bch2_trans_begin() - reset a transaction after a interrupted attempt * bch2_trans_begin() - reset a transaction after a interrupted attempt
* @trans: transaction to reset * @trans: transaction to reset
...@@ -2811,6 +2825,9 @@ u32 bch2_trans_begin(struct btree_trans *trans) ...@@ -2811,6 +2825,9 @@ u32 bch2_trans_begin(struct btree_trans *trans)
bch2_trans_relock(trans); bch2_trans_relock(trans);
} }
if (unlikely(time_after(jiffies, trans->srcu_lock_time + HZ)))
bch2_trans_reset_srcu_lock(trans);
trans->last_restarted_ip = _RET_IP_; trans->last_restarted_ip = _RET_IP_;
if (trans->restarted) if (trans->restarted)
bch2_btree_path_traverse_all(trans); bch2_btree_path_traverse_all(trans);
...@@ -2897,6 +2914,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_ ...@@ -2897,6 +2914,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
trans->nr_max_paths = s->nr_max_paths; trans->nr_max_paths = s->nr_max_paths;
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_lock_time = jiffies;
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) { if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
struct btree_trans *pos; struct btree_trans *pos;
......
...@@ -411,6 +411,7 @@ struct btree_trans { ...@@ -411,6 +411,7 @@ struct btree_trans {
enum bch_errcode restarted:16; enum bch_errcode restarted:16;
u32 restart_count; u32 restart_count;
unsigned long last_restarted_ip; unsigned long last_restarted_ip;
unsigned long srcu_lock_time;
/* /*
* For when bch2_trans_update notices we'll be splitting a compressed * For when bch2_trans_update notices we'll be splitting a compressed
......
...@@ -53,6 +53,7 @@ ...@@ -53,6 +53,7 @@
x(BCH_ERR_no_btree_node, no_btree_node_down) \ x(BCH_ERR_no_btree_node, no_btree_node_down) \
x(BCH_ERR_no_btree_node, no_btree_node_init) \ x(BCH_ERR_no_btree_node, no_btree_node_init) \
x(BCH_ERR_no_btree_node, no_btree_node_cached) \ x(BCH_ERR_no_btree_node, no_btree_node_cached) \
x(BCH_ERR_no_btree_node, no_btree_node_srcu_reset) \
x(0, btree_insert_fail) \ x(0, btree_insert_fail) \
x(BCH_ERR_btree_insert_fail, btree_insert_btree_node_full) \ x(BCH_ERR_btree_insert_fail, btree_insert_btree_node_full) \
x(BCH_ERR_btree_insert_fail, btree_insert_need_mark_replicas) \ x(BCH_ERR_btree_insert_fail, btree_insert_need_mark_replicas) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment