Commit f7b6ca23 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: BTREE_ITER_WITH_KEY_CACHE

This is the start of cache coherency with the btree key cache - this
adds a btree iterator flag that causes lookups to also check the key
cache when we're iterating over the btree (not iterating over the key
cache).

Note that we could still race with another thread creating at item in
the key cache and updating it, since we aren't holding the key cache
locked if it wasn't found. The next patch for the update path will
address this by causing the transaction to restart if the key cache is
found to be dirty.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 45e4cd9e
...@@ -1964,13 +1964,13 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct ...@@ -1964,13 +1964,13 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct
struct bkey_s_c k; struct bkey_s_c k;
BUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
if (!path->cached) { if (!path->cached) {
struct btree_path_level *l = path_l(path); struct btree_path_level *l = path_l(path);
struct bkey_packed *_k = struct bkey_packed *_k;
bch2_btree_node_iter_peek_all(&l->iter, l->b);
EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
_k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null; k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0); EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0);
...@@ -1980,12 +1980,15 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct ...@@ -1980,12 +1980,15 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct
} else { } else {
struct bkey_cached *ck = (void *) path->l[0].b; struct bkey_cached *ck = (void *) path->l[0].b;
EBUG_ON(path->btree_id != ck->key.btree_id || EBUG_ON(ck &&
bkey_cmp(path->pos, ck->key.pos)); (path->btree_id != ck->key.btree_id ||
bkey_cmp(path->pos, ck->key.pos)));
/* BTREE_ITER_CACHED_NOFILL? */ /* BTREE_ITER_CACHED_NOFILL|BTREE_ITER_CACHED_NOCREATE? */
if (unlikely(!ck->valid)) if (unlikely(!ck || !ck->valid))
goto hole; return bkey_s_c_null;
EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
*u = ck->k->k; *u = ck->k->k;
k = bkey_i_to_s_c(ck->k); k = bkey_i_to_s_c(ck->k);
...@@ -2233,11 +2236,43 @@ struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans, ...@@ -2233,11 +2236,43 @@ struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
return k; return k;
} }
/*
* Checks btree key cache for key at iter->pos and returns it if present, or
* bkey_s_c_null:
*/
static noinline
struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
{
struct btree_trans *trans = iter->trans;
struct bch_fs *c = trans->c;
struct bkey u;
int ret;
if (!bch2_btree_key_cache_find(c, iter->btree_id, pos))
return bkey_s_c_null;
if (!iter->key_cache_path)
iter->key_cache_path = bch2_path_get(trans, iter->btree_id, pos,
iter->flags & BTREE_ITER_INTENT, 0,
iter->flags|BTREE_ITER_CACHED);
iter->key_cache_path = bch2_btree_path_set_pos(trans, iter->key_cache_path, pos,
iter->flags & BTREE_ITER_INTENT);
ret = bch2_btree_path_traverse(trans, iter->key_cache_path, iter->flags|BTREE_ITER_CACHED);
if (unlikely(ret))
return bkey_s_c_err(ret);
iter->key_cache_path->should_be_locked = true;
return bch2_btree_path_peek_slot(iter->key_cache_path, &u);
}
static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key) static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
{ {
struct btree_trans *trans = iter->trans; struct btree_trans *trans = iter->trans;
struct bkey_i *next_update; struct bkey_i *next_update;
struct bkey_s_c k; struct bkey_s_c k, k2;
int ret; int ret;
EBUG_ON(iter->path->cached || iter->path->level); EBUG_ON(iter->path->cached || iter->path->level);
...@@ -2255,8 +2290,24 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp ...@@ -2255,8 +2290,24 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
goto out; goto out;
} }
iter->path->should_be_locked = true;
k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k); k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
k.k &&
(k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
ret = bkey_err(k2);
if (ret) {
k = k2;
bch2_btree_iter_set_pos(iter, iter->pos);
goto out;
}
k = k2;
iter->k = *k.k;
}
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL)) if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
k = btree_trans_peek_journal(trans, iter, k); k = btree_trans_peek_journal(trans, iter, k);
...@@ -2603,6 +2654,13 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) ...@@ -2603,6 +2654,13 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
(k = btree_trans_peek_slot_journal(trans, iter)).k) (k = btree_trans_peek_slot_journal(trans, iter)).k)
goto out; goto out;
if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
(k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
if (!bkey_err(k))
iter->k = *k.k;
goto out;
}
k = bch2_btree_path_peek_slot(iter->path, &iter->k); k = bch2_btree_path_peek_slot(iter->path, &iter->k);
} else { } else {
struct bpos next; struct bpos next;
...@@ -2806,8 +2864,12 @@ void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter) ...@@ -2806,8 +2864,12 @@ void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
if (iter->update_path) if (iter->update_path)
bch2_path_put(trans, iter->update_path, bch2_path_put(trans, iter->update_path,
iter->flags & BTREE_ITER_INTENT); iter->flags & BTREE_ITER_INTENT);
if (iter->key_cache_path)
bch2_path_put(trans, iter->key_cache_path,
iter->flags & BTREE_ITER_INTENT);
iter->path = NULL; iter->path = NULL;
iter->update_path = NULL; iter->update_path = NULL;
iter->key_cache_path = NULL;
} }
static void __bch2_trans_iter_init(struct btree_trans *trans, static void __bch2_trans_iter_init(struct btree_trans *trans,
...@@ -2834,12 +2896,16 @@ static void __bch2_trans_iter_init(struct btree_trans *trans, ...@@ -2834,12 +2896,16 @@ static void __bch2_trans_iter_init(struct btree_trans *trans,
if (trans->journal_replay_not_finished) if (trans->journal_replay_not_finished)
flags |= BTREE_ITER_WITH_JOURNAL; flags |= BTREE_ITER_WITH_JOURNAL;
if (!btree_id_cached(trans->c, btree_id)) if (!btree_id_cached(trans->c, btree_id)) {
flags &= ~BTREE_ITER_CACHED; flags &= ~BTREE_ITER_CACHED;
flags &= ~BTREE_ITER_WITH_KEY_CACHE;
} else if (!(flags & BTREE_ITER_CACHED))
flags |= BTREE_ITER_WITH_KEY_CACHE;
iter->trans = trans; iter->trans = trans;
iter->path = NULL; iter->path = NULL;
iter->update_path = NULL; iter->update_path = NULL;
iter->key_cache_path = NULL;
iter->btree_id = btree_id; iter->btree_id = btree_id;
iter->min_depth = depth; iter->min_depth = depth;
iter->flags = flags; iter->flags = flags;
...@@ -2887,6 +2953,7 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src) ...@@ -2887,6 +2953,7 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
__btree_path_get(src->path, src->flags & BTREE_ITER_INTENT); __btree_path_get(src->path, src->flags & BTREE_ITER_INTENT);
if (src->update_path) if (src->update_path)
__btree_path_get(src->update_path, src->flags & BTREE_ITER_INTENT); __btree_path_get(src->update_path, src->flags & BTREE_ITER_INTENT);
dst->key_cache_path = NULL;
} }
void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
......
...@@ -50,11 +50,6 @@ static inline struct btree *btree_node_parent(struct btree_path *path, ...@@ -50,11 +50,6 @@ static inline struct btree *btree_node_parent(struct btree_path *path,
return btree_path_node(path, b->c.level + 1); return btree_path_node(path, b->c.level + 1);
} }
static inline int btree_iter_err(const struct btree_iter *iter)
{
return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
}
/* Iterate over paths within a transaction: */ /* Iterate over paths within a transaction: */
void __bch2_btree_trans_sort_paths(struct btree_trans *); void __bch2_btree_trans_sort_paths(struct btree_trans *);
......
...@@ -209,19 +209,20 @@ static int btree_key_cache_fill(struct btree_trans *trans, ...@@ -209,19 +209,20 @@ static int btree_key_cache_fill(struct btree_trans *trans,
struct btree_path *ck_path, struct btree_path *ck_path,
struct bkey_cached *ck) struct bkey_cached *ck)
{ {
struct btree_iter iter; struct btree_path *path;
struct bkey_s_c k; struct bkey_s_c k;
unsigned new_u64s = 0; unsigned new_u64s = 0;
struct bkey_i *new_k = NULL; struct bkey_i *new_k = NULL;
struct bkey u;
int ret; int ret;
bch2_trans_iter_init(trans, &iter, ck->key.btree_id, path = bch2_path_get(trans, ck->key.btree_id, ck->key.pos, 0, 0, 0);
ck->key.pos, BTREE_ITER_SLOTS); ret = bch2_btree_path_traverse(trans, path, 0);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
if (ret) if (ret)
goto err; goto err;
k = bch2_btree_path_peek_slot(path, &u);
if (!bch2_btree_node_relock(trans, ck_path, 0)) { if (!bch2_btree_node_relock(trans, ck_path, 0)) {
trace_trans_restart_relock_key_cache_fill(trans->fn, trace_trans_restart_relock_key_cache_fill(trans->fn,
_THIS_IP_, ck_path->btree_id, &ck_path->pos); _THIS_IP_, ck_path->btree_id, &ck_path->pos);
...@@ -262,9 +263,9 @@ static int btree_key_cache_fill(struct btree_trans *trans, ...@@ -262,9 +263,9 @@ static int btree_key_cache_fill(struct btree_trans *trans,
bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b); bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b);
/* We're not likely to need this iterator again: */ /* We're not likely to need this iterator again: */
set_btree_iter_dontneed(&iter); path->preserve = false;
err: err:
bch2_trans_iter_exit(trans, &iter); bch2_path_put(trans, path, 0);
return ret; return ret;
} }
...@@ -385,6 +386,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, ...@@ -385,6 +386,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
BTREE_ITER_CACHED_NOFILL| BTREE_ITER_CACHED_NOFILL|
BTREE_ITER_CACHED_NOCREATE| BTREE_ITER_CACHED_NOCREATE|
BTREE_ITER_INTENT); BTREE_ITER_INTENT);
b_iter.flags &= ~BTREE_ITER_WITH_KEY_CACHE;
ret = bch2_btree_iter_traverse(&c_iter); ret = bch2_btree_iter_traverse(&c_iter);
if (ret) if (ret)
goto out; goto out;
......
...@@ -202,10 +202,10 @@ struct btree_node_iter { ...@@ -202,10 +202,10 @@ struct btree_node_iter {
*/ */
#define BTREE_ITER_IS_EXTENTS (1 << 4) #define BTREE_ITER_IS_EXTENTS (1 << 4)
#define BTREE_ITER_NOT_EXTENTS (1 << 5) #define BTREE_ITER_NOT_EXTENTS (1 << 5)
#define BTREE_ITER_ERROR (1 << 6) #define BTREE_ITER_CACHED (1 << 6)
#define BTREE_ITER_CACHED (1 << 7) #define BTREE_ITER_CACHED_NOFILL (1 << 7)
#define BTREE_ITER_CACHED_NOFILL (1 << 8) #define BTREE_ITER_CACHED_NOCREATE (1 << 8)
#define BTREE_ITER_CACHED_NOCREATE (1 << 9) #define BTREE_ITER_WITH_KEY_CACHE (1 << 9)
#define BTREE_ITER_WITH_UPDATES (1 << 10) #define BTREE_ITER_WITH_UPDATES (1 << 10)
#define BTREE_ITER_WITH_JOURNAL (1 << 11) #define BTREE_ITER_WITH_JOURNAL (1 << 11)
#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 12) #define __BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
...@@ -277,6 +277,7 @@ struct btree_iter { ...@@ -277,6 +277,7 @@ struct btree_iter {
struct btree_trans *trans; struct btree_trans *trans;
struct btree_path *path; struct btree_path *path;
struct btree_path *update_path; struct btree_path *update_path;
struct btree_path *key_cache_path;
enum btree_id btree_id:4; enum btree_id btree_id:4;
unsigned min_depth:4; unsigned min_depth:4;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment