Commit e2a316b3 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: BCH_WATERMARK_interior_updates

This adds a new watermark, higher priority than BCH_WATERMARK_reclaim,
for interior btree updates. We've seen a deadlock where journal replay
triggers a ton of btree node merges, and these use up all available open
buckets and then interior updates get stuck.

One cause of this is that we're currently lacking btree node merging on
write buffer btrees - that needs to be fixed as well.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent ba947ecd
...@@ -188,8 +188,10 @@ long bch2_bucket_alloc_new_fs(struct bch_dev *ca) ...@@ -188,8 +188,10 @@ long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
static inline unsigned open_buckets_reserved(enum bch_watermark watermark) static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
{ {
switch (watermark) { switch (watermark) {
case BCH_WATERMARK_reclaim: case BCH_WATERMARK_interior_updates:
return 0; return 0;
case BCH_WATERMARK_reclaim:
return OPEN_BUCKETS_COUNT / 6;
case BCH_WATERMARK_btree: case BCH_WATERMARK_btree:
case BCH_WATERMARK_btree_copygc: case BCH_WATERMARK_btree_copygc:
return OPEN_BUCKETS_COUNT / 4; return OPEN_BUCKETS_COUNT / 4;
......
...@@ -22,7 +22,8 @@ struct bucket_alloc_state { ...@@ -22,7 +22,8 @@ struct bucket_alloc_state {
x(copygc) \ x(copygc) \
x(btree) \ x(btree) \
x(btree_copygc) \ x(btree_copygc) \
x(reclaim) x(reclaim) \
x(interior_updates)
enum bch_watermark { enum bch_watermark {
#define x(name) BCH_WATERMARK_##name, #define x(name) BCH_WATERMARK_##name,
......
...@@ -1861,7 +1861,7 @@ static void btree_node_write_work(struct work_struct *work) ...@@ -1861,7 +1861,7 @@ static void btree_node_write_work(struct work_struct *work)
} else { } else {
ret = bch2_trans_do(c, NULL, NULL, 0, ret = bch2_trans_do(c, NULL, NULL, 0,
bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
BCH_WATERMARK_reclaim| BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_journal_reclaim| BCH_TRANS_COMMIT_journal_reclaim|
BCH_TRANS_COMMIT_no_enospc| BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_check_rw, BCH_TRANS_COMMIT_no_check_rw,
......
...@@ -887,6 +887,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, ...@@ -887,6 +887,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
int ret, unsigned long trace_ip) int ret, unsigned long trace_ip)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
switch (ret) { switch (ret) {
case -BCH_ERR_btree_insert_btree_node_full: case -BCH_ERR_btree_insert_btree_node_full:
...@@ -905,7 +906,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, ...@@ -905,7 +906,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
* flag * flag
*/ */
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) && if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
(flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) { watermark < BCH_WATERMARK_reclaim) {
ret = -BCH_ERR_journal_reclaim_would_deadlock; ret = -BCH_ERR_journal_reclaim_would_deadlock;
break; break;
} }
......
...@@ -687,7 +687,7 @@ static void btree_update_nodes_written(struct btree_update *as) ...@@ -687,7 +687,7 @@ static void btree_update_nodes_written(struct btree_update *as)
* which may require allocations as well. * which may require allocations as well.
*/ */
ret = commit_do(trans, &as->disk_res, &journal_seq, ret = commit_do(trans, &as->disk_res, &journal_seq,
BCH_WATERMARK_reclaim| BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_no_enospc| BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_check_rw| BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_journal_reclaim, BCH_TRANS_COMMIT_journal_reclaim,
...@@ -1121,7 +1121,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ...@@ -1121,7 +1121,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK; unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) && if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
watermark != BCH_WATERMARK_reclaim) watermark < BCH_WATERMARK_reclaim)
journal_flags |= JOURNAL_RES_GET_NONBLOCK; journal_flags |= JOURNAL_RES_GET_NONBLOCK;
ret = drop_locks_do(trans, ret = drop_locks_do(trans,
...@@ -1217,7 +1217,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ...@@ -1217,7 +1217,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
*/ */
if (bch2_err_matches(ret, ENOSPC) && if (bch2_err_matches(ret, ENOSPC) &&
(flags & BCH_TRANS_COMMIT_journal_reclaim) && (flags & BCH_TRANS_COMMIT_journal_reclaim) &&
watermark != BCH_WATERMARK_reclaim) { watermark < BCH_WATERMARK_reclaim) {
ret = -BCH_ERR_journal_reclaim_would_deadlock; ret = -BCH_ERR_journal_reclaim_would_deadlock;
goto err; goto err;
} }
......
...@@ -226,6 +226,7 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_waterma ...@@ -226,6 +226,7 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_waterma
fallthrough; fallthrough;
case BCH_WATERMARK_btree_copygc: case BCH_WATERMARK_btree_copygc:
case BCH_WATERMARK_reclaim: case BCH_WATERMARK_reclaim:
case BCH_WATERMARK_interior_updates:
break; break;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment