Commit 45e4dcba authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Inode create optimization

On workloads that do a lot of multithreaded creates all at once, lock
contention on the inodes btree turns out to still be an issue.

This patch adds a small buffer of inode numbers that are known to be
free, so that we can avoid touching the btree on every create. Also,
this changes inode creates to update via the btree key cache for the
initial create.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent b16fa0ba
...@@ -802,6 +802,10 @@ struct bch_fs { ...@@ -802,6 +802,10 @@ struct bch_fs {
struct mutex verify_lock; struct mutex verify_lock;
#endif #endif
struct mutex inode_create_lock;
unsigned unused_inodes_nr;
u64 unused_inodes[64];
u32 unused_inodes_gens[64];
u64 unused_inode_hint; u64 unused_inode_hint;
/* /*
......
...@@ -34,9 +34,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, ...@@ -34,9 +34,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
if (!name) if (!name)
new_inode->bi_flags |= BCH_INODE_UNLINKED; new_inode->bi_flags |= BCH_INODE_UNLINKED;
ret = bch2_inode_create(trans, new_inode, ret = bch2_inode_create(trans, new_inode);
BLOCKDEV_INODE_MAX, 0,
&c->unused_inode_hint);
if (ret) if (ret)
goto err; goto err;
......
...@@ -361,71 +361,120 @@ static inline u32 bkey_generation(struct bkey_s_c k) ...@@ -361,71 +361,120 @@ static inline u32 bkey_generation(struct bkey_s_c k)
} }
} }
int bch2_inode_create(struct btree_trans *trans, static int scan_free_inums(struct btree_trans *trans)
struct bch_inode_unpacked *inode_u,
u64 min, u64 max, u64 *hint)
{ {
struct bkey_inode_buf *inode_p; struct bch_fs *c = trans->c;
struct btree_iter *iter = NULL; struct btree_iter *iter = NULL;
struct bkey_s_c k; struct bkey_s_c k;
u64 start; u64 min = BLOCKDEV_INODE_MAX;
int ret; u64 max = c->opts.inodes_32bit
? S32_MAX : S64_MAX;
if (!max) u64 start = max(min, READ_ONCE(c->unused_inode_hint));
max = ULLONG_MAX; int ret = 0;
if (trans->c->opts.inodes_32bit) iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(0, start),
max = min_t(u64, max, U32_MAX); BTREE_ITER_SLOTS);
if (IS_ERR(iter))
return PTR_ERR(iter);
again:
for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k, ret) {
if (bkey_cmp(iter->pos, POS(0, max)) > 0)
break;
start = READ_ONCE(*hint); /*
* This doesn't check the btree key cache, but we don't care:
* we have to recheck with an intent lock held on the slot we're
* inserting to anyways:
*/
if (k.k->type != KEY_TYPE_inode) {
if (c->unused_inodes_nr < ARRAY_SIZE(c->unused_inodes)) {
c->unused_inodes[c->unused_inodes_nr] = k.k->p.offset;
c->unused_inodes_gens[c->unused_inodes_nr] = bkey_generation(k);
c->unused_inodes_nr++;
}
if (c->unused_inodes_nr == ARRAY_SIZE(c->unused_inodes))
goto out;
}
}
if (start >= max || start < min) if (!ret && start != min) {
max = start;
start = min; start = min;
bch2_btree_iter_set_pos(iter, POS(0, start));
goto again;
}
out:
c->unused_inode_hint = iter->pos.offset;
bch2_trans_iter_put(trans, iter);
return ret;
}
int bch2_inode_create(struct btree_trans *trans,
struct bch_inode_unpacked *inode_u)
{
struct bch_fs *c = trans->c;
struct bkey_inode_buf *inode_p;
struct btree_iter *iter = NULL;
struct bkey_s_c k;
u64 inum;
u32 generation;
int ret = 0;
inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
if (IS_ERR(inode_p)) if (IS_ERR(inode_p))
return PTR_ERR(inode_p); return PTR_ERR(inode_p);
again:
for_each_btree_key(trans, iter, BTREE_ID_INODES, POS(0, start),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
if (bkey_cmp(iter->pos, POS(0, max)) > 0)
break;
/* iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS_MIN,
* There's a potential cache coherency issue with the btree key BTREE_ITER_CACHED|
* cache code here - we're iterating over the btree, skipping BTREE_ITER_INTENT);
* that cache. We should never see an empty slot that isn't if (IS_ERR(iter))
* actually empty due to a pending update in the key cache return PTR_ERR(iter);
* because the update that creates the inode isn't done with a retry:
* cached iterator, but - better safe than sorry, check the if (!mutex_trylock(&c->inode_create_lock)) {
* cache before using a slot: bch2_trans_unlock(trans);
*/ mutex_lock(&c->inode_create_lock);
if (k.k->type != KEY_TYPE_inode && if (!bch2_trans_relock(trans)) {
!bch2_btree_key_cache_find(trans->c, BTREE_ID_INODES, iter->pos)) mutex_unlock(&c->inode_create_lock);
goto found_slot; ret = -EINTR;
goto err;
}
} }
bch2_trans_iter_put(trans, iter); if (!c->unused_inodes_nr)
ret = scan_free_inums(trans);
if (!ret && !c->unused_inodes_nr)
ret = -ENOSPC;
if (!ret) {
--c->unused_inodes_nr;
inum = c->unused_inodes[c->unused_inodes_nr];
generation = c->unused_inodes_gens[c->unused_inodes_nr];
}
mutex_unlock(&c->inode_create_lock);
if (ret) if (ret)
return ret; goto err;
if (start != min) { bch2_btree_iter_set_pos(iter, POS(0, inum));
/* Retry from start */
start = min; /* Recheck that the slot is free with an intent lock held: */
goto again; k = bch2_btree_iter_peek_cached(iter);
} ret = bkey_err(k);
if (ret)
goto err;
if (k.k->type == KEY_TYPE_inode)
goto retry;
return -ENOSPC; inode_u->bi_inum = inum;
found_slot: inode_u->bi_generation = generation;
*hint = k.k->p.offset;
inode_u->bi_inum = k.k->p.offset;
inode_u->bi_generation = bkey_generation(k);
bch2_inode_pack(inode_p, inode_u); bch2_inode_pack(inode_p, inode_u);
bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
err:
bch2_trans_iter_put(trans, iter); bch2_trans_iter_put(trans, iter);
return 0; return ret;
} }
int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
......
...@@ -60,9 +60,7 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *, ...@@ -60,9 +60,7 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
uid_t, gid_t, umode_t, dev_t, uid_t, gid_t, umode_t, dev_t,
struct bch_inode_unpacked *); struct bch_inode_unpacked *);
int bch2_inode_create(struct btree_trans *, int bch2_inode_create(struct btree_trans *, struct bch_inode_unpacked *);
struct bch_inode_unpacked *,
u64, u64, u64 *);
int bch2_inode_rm(struct bch_fs *, u64); int bch2_inode_rm(struct bch_fs *, u64);
......
...@@ -703,6 +703,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) ...@@ -703,6 +703,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
seqcount_init(&c->usage_lock); seqcount_init(&c->usage_lock);
mutex_init(&c->inode_create_lock);
c->copy_gc_enabled = 1; c->copy_gc_enabled = 1;
c->rebalance.enabled = 1; c->rebalance.enabled = 1;
c->promote_whole_extents = true; c->promote_whole_extents = true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment