Commit 36e9d698 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Do updates in order they were queued up in

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent f9c55193
......@@ -1671,7 +1671,10 @@ int bch2_trans_iter_free_on_commit(struct btree_trans *trans,
static int bch2_trans_realloc_iters(struct btree_trans *trans,
unsigned new_size)
{
void *new_iters, *new_updates;
void *new_iters, *new_updates, *new_sorted;
size_t iters_bytes;
size_t updates_bytes;
size_t sorted_bytes;
new_size = roundup_pow_of_two(new_size);
......@@ -1684,9 +1687,13 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans,
bch2_trans_unlock(trans);
new_iters = kmalloc(sizeof(struct btree_iter) * new_size +
sizeof(struct btree_insert_entry) * (new_size + 4),
GFP_NOFS);
iters_bytes = sizeof(struct btree_iter) * new_size;
updates_bytes = sizeof(struct btree_insert_entry) * (new_size + 4);
sorted_bytes = sizeof(u8) * (new_size + 4);
new_iters = kmalloc(iters_bytes +
updates_bytes +
sorted_bytes, GFP_NOFS);
if (new_iters)
goto success;
......@@ -1695,7 +1702,8 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans,
trans->used_mempool = true;
success:
new_updates = new_iters + sizeof(struct btree_iter) * new_size;
new_updates = new_iters + iters_bytes;
new_sorted = new_updates + updates_bytes;
memcpy(new_iters, trans->iters,
sizeof(struct btree_iter) * trans->nr_iters);
......@@ -1710,9 +1718,10 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans,
if (trans->iters != trans->iters_onstack)
kfree(trans->iters);
trans->iters = new_iters;
trans->updates = new_updates;
trans->size = new_size;
trans->iters = new_iters;
trans->updates = new_updates;
trans->updates_sorted = new_sorted;
trans->size = new_size;
if (trans->iters_live) {
trace_trans_restart_iters_realloced(trans->ip, trans->size);
......@@ -1958,6 +1967,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
trans->size = ARRAY_SIZE(trans->iters_onstack);
trans->iters = trans->iters_onstack;
trans->updates = trans->updates_onstack;
trans->updates_sorted = trans->updates_sorted_onstack;
trans->fs_usage_deltas = NULL;
if (expected_nr_iters > trans->size)
......@@ -1982,3 +1992,18 @@ int bch2_trans_exit(struct btree_trans *trans)
return trans->error ? -EIO : 0;
}
void bch2_fs_btree_iter_exit(struct bch_fs *c)
{
mempool_exit(&c->btree_iters_pool);
}
int bch2_fs_btree_iter_init(struct bch_fs *c)
{
unsigned nr = BTREE_ITER_MAX;
return mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
sizeof(struct btree_iter) * nr +
sizeof(struct btree_insert_entry) * (nr + 4) +
sizeof(u8) * (nr + 4));
}
......@@ -303,4 +303,7 @@ void *bch2_trans_kmalloc(struct btree_trans *, size_t);
void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
int bch2_trans_exit(struct btree_trans *);
void bch2_fs_btree_iter_exit(struct bch_fs *);
int bch2_fs_btree_iter_init(struct bch_fs *);
#endif /* _BCACHEFS_BTREE_ITER_H */
......@@ -264,8 +264,6 @@ struct btree_insert_entry {
};
bool deferred;
bool triggered;
bool marked;
};
#define BTREE_ITER_MAX 64
......@@ -294,6 +292,7 @@ struct btree_trans {
struct btree_iter *iters;
struct btree_insert_entry *updates;
u8 *updates_sorted;
/* update path: */
struct journal_res journal_res;
......@@ -305,6 +304,7 @@ struct btree_trans {
struct btree_iter iters_onstack[2];
struct btree_insert_entry updates_onstack[6];
u8 updates_sorted_onstack[6];
struct replicas_delta_list *fs_usage_deltas;
};
......
......@@ -119,8 +119,13 @@ int bch2_trans_commit(struct btree_trans *,
struct disk_reservation *,
u64 *, unsigned);
struct btree_insert_entry *bch2_trans_update(struct btree_trans *,
struct btree_insert_entry);
static inline void bch2_trans_update(struct btree_trans *trans,
struct btree_insert_entry entry)
{
EBUG_ON(trans->nr_updates >= trans->nr_iters + 4);
trans->updates[trans->nr_updates++] = entry;
}
#define bch2_trans_do(_c, _journal_seq, _flags, _do) \
({ \
......@@ -140,18 +145,6 @@ struct btree_insert_entry *bch2_trans_update(struct btree_trans *,
_ret; \
})
/*
* We sort transaction entries so that if multiple iterators point to the same
* leaf node they'll be adjacent:
*/
static inline bool same_leaf_as_prev(struct btree_trans *trans,
struct btree_insert_entry *i)
{
return i != trans->updates &&
!i->deferred &&
i[0].iter->l[0].b == i[-1].iter->l[0].b;
}
#define __trans_next_update(_trans, _i, _filter) \
({ \
while ((_i) < (_trans)->updates + (_trans->nr_updates) && !(_filter))\
......@@ -171,8 +164,4 @@ static inline bool same_leaf_as_prev(struct btree_trans *trans,
#define trans_for_each_update_iter(trans, i) \
__trans_for_each_update(trans, i, !(i)->deferred)
#define trans_for_each_update_leaf(trans, i) \
__trans_for_each_update(trans, i, !(i)->deferred && \
!same_leaf_as_prev(trans, i))
#endif /* _BCACHEFS_BTREE_UPDATE_H */
......@@ -19,6 +19,26 @@
#include <linux/sort.h>
static inline bool same_leaf_as_prev(struct btree_trans *trans,
unsigned sorted_idx)
{
struct btree_insert_entry *i = trans->updates +
trans->updates_sorted[sorted_idx];
struct btree_insert_entry *prev = sorted_idx
? trans->updates + trans->updates_sorted[sorted_idx - 1]
: NULL;
return !i->deferred &&
prev &&
i->iter->l[0].b == prev->iter->l[0].b;
}
#define trans_for_each_update_sorted(_trans, _i, _iter) \
for (iter = 0; \
_iter < _trans->nr_updates && \
(_i = _trans->updates + _trans->updates_sorted[_iter], 1); \
_iter++)
inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
struct btree_iter *iter)
{
......@@ -36,20 +56,21 @@ inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
bch2_btree_init_next(c, b, iter);
}
static void btree_trans_lock_write(struct bch_fs *c, struct btree_trans *trans)
static void btree_trans_lock_write(struct btree_trans *trans, bool lock)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
unsigned iter;
trans_for_each_update_leaf(trans, i)
bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter);
}
static void btree_trans_unlock_write(struct btree_trans *trans)
{
struct btree_insert_entry *i;
trans_for_each_update_sorted(trans, i, iter) {
if (same_leaf_as_prev(trans, iter))
continue;
trans_for_each_update_leaf(trans, i)
bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
if (lock)
bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter);
else
bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
}
}
static inline int btree_trans_cmp(struct btree_insert_entry l,
......@@ -59,6 +80,30 @@ static inline int btree_trans_cmp(struct btree_insert_entry l,
btree_iter_cmp(l.iter, r.iter);
}
static inline void btree_trans_sort_updates(struct btree_trans *trans)
{
struct btree_insert_entry *l, *r;
unsigned nr = 0, pos;
trans_for_each_update(trans, l) {
for (pos = 0; pos < nr; pos++) {
r = trans->updates + trans->updates_sorted[pos];
if (btree_trans_cmp(*l, *r) <= 0)
break;
}
memmove(&trans->updates_sorted[pos + 1],
&trans->updates_sorted[pos],
(nr - pos) * sizeof(trans->updates_sorted[0]));
trans->updates_sorted[pos] = l - trans->updates;
nr++;
}
BUG_ON(nr != trans->nr_updates);
}
/* Inserting into a given leaf node (last stage of insert): */
/* Handle overwrites and do insert, for non extents: */
......@@ -488,12 +533,12 @@ static int btree_trans_check_can_insert(struct btree_trans *trans,
struct btree_insert_entry **stopped_at)
{
struct btree_insert_entry *i;
unsigned u64s = 0;
unsigned iter, u64s = 0;
int ret;
trans_for_each_update_iter(trans, i) {
trans_for_each_update_sorted(trans, i, iter) {
/* Multiple inserts might go to same leaf: */
if (!same_leaf_as_prev(trans, i))
if (!same_leaf_as_prev(trans, iter))
u64s = 0;
u64s += i->k->k.u64s;
......@@ -542,7 +587,6 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct bch_fs_usage_online *fs_usage = NULL;
struct btree_insert_entry *i;
bool saw_non_marked;
unsigned mark_flags = trans->flags & BTREE_INSERT_BUCKET_INVALIDATE
? BCH_BUCKET_MARK_BUCKET_INVALIDATE
: 0;
......@@ -551,35 +595,31 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
trans_for_each_update_iter(trans, i)
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
/*
* note: running triggers will append more updates to the list of
* updates as we're walking it:
*/
trans_for_each_update_iter(trans, i)
i->marked = false;
do {
saw_non_marked = false;
trans_for_each_update_iter(trans, i) {
if (i->marked)
continue;
saw_non_marked = true;
i->marked = true;
if (update_has_triggers(trans, i) &&
update_triggers_transactional(trans, i)) {
ret = bch2_trans_mark_update(trans, i->iter, i->k);
if (ret == -EINTR)
trace_trans_restart_mark(trans->ip);
if (ret)
goto out_clear_replicas;
}
if (update_has_triggers(trans, i) &&
update_triggers_transactional(trans, i)) {
ret = bch2_trans_mark_update(trans, i->iter, i->k);
if (ret == -EINTR)
trace_trans_restart_mark(trans->ip);
if (ret)
goto out_clear_replicas;
}
} while (saw_non_marked);
trans_for_each_update(trans, i)
btree_insert_entry_checks(trans, i);
bch2_btree_trans_verify_locks(trans);
btree_trans_lock_write(c, trans);
/*
* No more updates can be added - sort updates so we can take write
* locks in the correct order:
*/
btree_trans_sort_updates(trans);
btree_trans_lock_write(trans, true);
if (race_fault()) {
ret = -EINTR;
......@@ -597,8 +637,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
goto out;
trans_for_each_update_iter(trans, i) {
if (i->deferred ||
!btree_node_type_needs_gc(i->iter->btree_id))
if (!btree_node_type_needs_gc(i->iter->btree_id))
continue;
if (!fs_usage) {
......@@ -664,7 +703,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
(trans->flags & BTREE_INSERT_JOURNAL_RESERVED) &&
trans->journal_res.ref);
btree_trans_unlock_write(trans);
btree_trans_lock_write(trans, false);
if (fs_usage) {
bch2_fs_usage_scratch_put(c, fs_usage);
......@@ -689,19 +728,6 @@ int bch2_trans_commit_error(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
unsigned flags = trans->flags;
struct btree_insert_entry *src, *dst;
src = dst = trans->updates;
while (src < trans->updates + trans->nr_updates) {
if (!src->triggered) {
*dst = *src;
dst++;
}
src++;
}
trans->nr_updates = dst - trans->updates;
/*
* BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
......@@ -816,6 +842,7 @@ static int __bch2_trans_commit(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
unsigned iter;
int ret;
trans_for_each_update_iter(trans, i) {
......@@ -837,8 +864,10 @@ static int __bch2_trans_commit(struct btree_trans *trans,
if (trans->flags & BTREE_INSERT_NOUNLOCK)
trans->nounlock = true;
trans_for_each_update_leaf(trans, i)
bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags);
trans_for_each_update_sorted(trans, i, iter)
if (!same_leaf_as_prev(trans, iter))
bch2_foreground_maybe_merge(c, i->iter,
0, trans->flags);
trans->nounlock = false;
......@@ -858,7 +887,8 @@ int bch2_trans_commit(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i = NULL;
unsigned orig_mem_top = trans->mem_top;
unsigned orig_nr_updates = trans->nr_updates;
unsigned orig_mem_top = trans->mem_top;
int ret = 0;
if (!trans->nr_updates)
......@@ -931,39 +961,20 @@ int bch2_trans_commit(struct btree_trans *trans,
err:
ret = bch2_trans_commit_error(trans, i, ret);
/* free updates and memory used by triggers, they'll be reexecuted: */
trans->nr_updates = orig_nr_updates;
trans->mem_top = orig_mem_top;
/* can't loop if it was passed in and we changed it: */
if (unlikely(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS) && !ret)
ret = -EINTR;
if (!ret) {
/* free memory used by triggers, they'll be reexecuted: */
trans->mem_top = orig_mem_top;
if (!ret)
goto retry;
}
goto out;
}
struct btree_insert_entry *bch2_trans_update(struct btree_trans *trans,
struct btree_insert_entry entry)
{
struct btree_insert_entry *i;
BUG_ON(trans->nr_updates >= trans->nr_iters + 4);
for (i = trans->updates;
i < trans->updates + trans->nr_updates;
i++)
if (btree_trans_cmp(entry, *i) < 0)
break;
memmove(&i[1], &i[0],
(void *) &trans->updates[trans->nr_updates] - (void *) i);
trans->nr_updates++;
*i = entry;
return i;
}
/**
* bch2_btree_insert - insert keys into the extent btree
* @c: pointer to struct bch_fs
......
......@@ -1358,11 +1358,8 @@ static int trans_get_key(struct btree_trans *trans,
struct btree_insert_entry *i;
int ret;
for (i = trans->updates;
i < trans->updates + trans->nr_updates;
i++)
if (!i->deferred &&
i->iter->btree_id == btree_id &&
trans_for_each_update_iter(trans, i)
if (i->iter->btree_id == btree_id &&
(btree_node_type_is_extents(btree_id)
? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 &&
bkey_cmp(pos, i->k->k.p) < 0
......@@ -1390,8 +1387,8 @@ static void *trans_update_key(struct btree_trans *trans,
struct btree_iter *iter,
unsigned u64s)
{
struct btree_insert_entry *i;
struct bkey_i *new_k;
unsigned i;
new_k = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
if (IS_ERR(new_k))
......@@ -1400,19 +1397,13 @@ static void *trans_update_key(struct btree_trans *trans,
bkey_init(&new_k->k);
new_k->k.p = iter->pos;
for (i = 0; i < trans->nr_updates; i++)
if (!trans->updates[i].deferred &&
trans->updates[i].iter == iter) {
trans->updates[i].k = new_k;
trans_for_each_update_iter(trans, i)
if (i->iter == iter) {
i->k = new_k;
return new_k;
}
bch2_trans_update(trans, ((struct btree_insert_entry) {
.iter = iter,
.k = new_k,
.triggered = true,
}));
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, new_k));
return new_k;
}
......
......@@ -462,6 +462,7 @@ static void bch2_fs_free(struct bch_fs *c)
bch2_fs_ec_exit(c);
bch2_fs_encryption_exit(c);
bch2_fs_io_exit(c);
bch2_fs_btree_iter_exit(c);
bch2_fs_btree_cache_exit(c);
bch2_fs_journal_exit(&c->journal);
bch2_io_clock_exit(&c->io_clock[WRITE]);
......@@ -474,7 +475,6 @@ static void bch2_fs_free(struct bch_fs *c)
free_percpu(c->usage[0]);
kfree(c->usage_base);
free_percpu(c->pcpu);
mempool_exit(&c->btree_iters_pool);
mempool_exit(&c->btree_bounce_pool);
bioset_exit(&c->btree_bio);
mempool_exit(&c->btree_interior_update_pool);
......@@ -729,15 +729,12 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
!(c->online_reserved = alloc_percpu(u64)) ||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
btree_bytes(c)) ||
mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
sizeof(struct btree_iter) * BTREE_ITER_MAX +
sizeof(struct btree_insert_entry) *
(BTREE_ITER_MAX + 4)) ||
bch2_io_clock_init(&c->io_clock[READ]) ||
bch2_io_clock_init(&c->io_clock[WRITE]) ||
bch2_fs_journal_init(&c->journal) ||
bch2_fs_replicas_init(c) ||
bch2_fs_btree_cache_init(c) ||
bch2_fs_btree_iter_init(c) ||
bch2_fs_io_init(c) ||
bch2_fs_encryption_init(c) ||
bch2_fs_compress_init(c) ||
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment