Commit 1a21bf98 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Add a single slot percpu buf for btree iters

Allocating our array of btree iters is a big enough allocation that it
hits the buddy allocator, and we're seeing lots of lock contention.
Sticking a single element buffer in front of it should help.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 00276f9f
...@@ -541,6 +541,10 @@ struct journal_keys { ...@@ -541,6 +541,10 @@ struct journal_keys {
u64 journal_seq_base; u64 journal_seq_base;
}; };
struct btree_iter_buf {
struct btree_iter *iter;
};
struct bch_fs { struct bch_fs {
struct closure cl; struct closure cl;
...@@ -636,6 +640,7 @@ struct bch_fs { ...@@ -636,6 +640,7 @@ struct bch_fs {
struct mutex btree_trans_lock; struct mutex btree_trans_lock;
struct list_head btree_trans_list; struct list_head btree_trans_list;
mempool_t btree_iters_pool; mempool_t btree_iters_pool;
struct btree_iter_buf __percpu *btree_iters_bufs;
struct btree_key_cache btree_key_cache; struct btree_key_cache btree_key_cache;
......
...@@ -1991,6 +1991,7 @@ int bch2_trans_iter_free(struct btree_trans *trans, ...@@ -1991,6 +1991,7 @@ int bch2_trans_iter_free(struct btree_trans *trans,
return bch2_trans_iter_put(trans, iter); return bch2_trans_iter_put(trans, iter);
} }
#if 0
static int bch2_trans_realloc_iters(struct btree_trans *trans, static int bch2_trans_realloc_iters(struct btree_trans *trans,
unsigned new_size) unsigned new_size)
{ {
...@@ -2053,6 +2054,7 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans, ...@@ -2053,6 +2054,7 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans,
return 0; return 0;
} }
#endif
static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans) static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
{ {
...@@ -2062,28 +2064,27 @@ static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans) ...@@ -2062,28 +2064,27 @@ static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
goto got_slot; goto got_slot;
if (trans->nr_iters == trans->size) { if (trans->nr_iters == trans->size) {
int ret; struct btree_iter *iter;
if (trans->nr_iters >= BTREE_ITER_MAX) {
struct btree_iter *iter;
trans_for_each_iter(trans, iter) {
pr_err("iter: btree %s pos %llu:%llu%s%s%s %ps",
bch2_btree_ids[iter->btree_id],
iter->pos.inode,
iter->pos.offset,
(trans->iters_live & (1ULL << iter->idx)) ? " live" : "",
(trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "",
iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "",
(void *) iter->ip_allocated);
}
panic("trans iter oveflow\n"); BUG_ON(trans->size < BTREE_ITER_MAX);
trans_for_each_iter(trans, iter) {
pr_err("iter: btree %s pos %llu:%llu%s%s%s %ps",
bch2_btree_ids[iter->btree_id],
iter->pos.inode,
iter->pos.offset,
(trans->iters_live & (1ULL << iter->idx)) ? " live" : "",
(trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "",
iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "",
(void *) iter->ip_allocated);
} }
panic("trans iter oveflow\n");
#if 0
ret = bch2_trans_realloc_iters(trans, trans->size * 2); ret = bch2_trans_realloc_iters(trans, trans->size * 2);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
#endif
} }
idx = trans->nr_iters++; idx = trans->nr_iters++;
...@@ -2326,22 +2327,37 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags) ...@@ -2326,22 +2327,37 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
bch2_btree_iter_traverse_all(trans); bch2_btree_iter_traverse_all(trans);
} }
static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)
{
unsigned new_size = BTREE_ITER_MAX;
size_t iters_bytes = sizeof(struct btree_iter) * new_size;
size_t updates_bytes = sizeof(struct btree_insert_entry) * new_size;
void *p;
BUG_ON(trans->used_mempool);
p = this_cpu_xchg(c->btree_iters_bufs->iter, NULL) ?:
mempool_alloc(&trans->c->btree_iters_pool, GFP_NOFS);
trans->iters = p; p += iters_bytes;
trans->updates = p; p += updates_bytes;
trans->updates2 = p; p += updates_bytes;
trans->size = new_size;
}
void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
unsigned expected_nr_iters, unsigned expected_nr_iters,
size_t expected_mem_bytes) size_t expected_mem_bytes)
{ {
/*
* reallocating iterators currently completely breaks
* bch2_trans_iter_put():
*/
expected_nr_iters = BTREE_ITER_MAX;
memset(trans, 0, sizeof(*trans)); memset(trans, 0, sizeof(*trans));
trans->c = c; trans->c = c;
trans->ip = _RET_IP_; trans->ip = _RET_IP_;
if (expected_nr_iters > trans->size) /*
bch2_trans_realloc_iters(trans, expected_nr_iters); * reallocating iterators currently completely breaks
* bch2_trans_iter_put(), we always allocate the max:
*/
bch2_trans_alloc_iters(trans, c);
if (expected_mem_bytes) if (expected_mem_bytes)
bch2_trans_preload_mem(trans, expected_mem_bytes); bch2_trans_preload_mem(trans, expected_mem_bytes);
...@@ -2356,6 +2372,8 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, ...@@ -2356,6 +2372,8 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
int bch2_trans_exit(struct btree_trans *trans) int bch2_trans_exit(struct btree_trans *trans)
{ {
struct bch_fs *c = trans->c;
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
...@@ -2368,10 +2386,11 @@ int bch2_trans_exit(struct btree_trans *trans) ...@@ -2368,10 +2386,11 @@ int bch2_trans_exit(struct btree_trans *trans)
kfree(trans->fs_usage_deltas); kfree(trans->fs_usage_deltas);
kfree(trans->mem); kfree(trans->mem);
if (trans->used_mempool)
trans->iters = this_cpu_xchg(c->btree_iters_bufs->iter, trans->iters);
if (trans->iters)
mempool_free(trans->iters, &trans->c->btree_iters_pool); mempool_free(trans->iters, &trans->c->btree_iters_pool);
else
kfree(trans->iters);
trans->mem = (void *) 0x1; trans->mem = (void *) 0x1;
trans->iters = (void *) 0x1; trans->iters = (void *) 0x1;
......
...@@ -458,6 +458,7 @@ int bch2_fs_read_write_early(struct bch_fs *c) ...@@ -458,6 +458,7 @@ int bch2_fs_read_write_early(struct bch_fs *c)
static void __bch2_fs_free(struct bch_fs *c) static void __bch2_fs_free(struct bch_fs *c)
{ {
unsigned i; unsigned i;
int cpu;
for (i = 0; i < BCH_TIME_STAT_NR; i++) for (i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_exit(&c->times[i]); bch2_time_stats_exit(&c->times[i]);
...@@ -483,6 +484,12 @@ static void __bch2_fs_free(struct bch_fs *c) ...@@ -483,6 +484,12 @@ static void __bch2_fs_free(struct bch_fs *c)
free_percpu(c->usage[1]); free_percpu(c->usage[1]);
free_percpu(c->usage[0]); free_percpu(c->usage[0]);
kfree(c->usage_base); kfree(c->usage_base);
if (c->btree_iters_bufs)
for_each_possible_cpu(cpu)
kfree(per_cpu_ptr(c->btree_iters_bufs, cpu)->iter);
free_percpu(c->btree_iters_bufs);
free_percpu(c->pcpu); free_percpu(c->pcpu);
mempool_exit(&c->large_bkey_pool); mempool_exit(&c->large_bkey_pool);
mempool_exit(&c->btree_bounce_pool); mempool_exit(&c->btree_bounce_pool);
...@@ -762,6 +769,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) ...@@ -762,6 +769,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
BIOSET_NEED_BVECS) || BIOSET_NEED_BVECS) ||
!(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) || !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
!(c->online_reserved = alloc_percpu(u64)) || !(c->online_reserved = alloc_percpu(u64)) ||
!(c->btree_iters_bufs = alloc_percpu(struct btree_iter_buf)) ||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
btree_bytes(c)) || btree_bytes(c)) ||
mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) || mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment