Commit 57943511 authored by Kent Overstreet's avatar Kent Overstreet

bcache: Refactor btree io

The most significant change is that btree reads are now done
synchronously, instead of asynchronously and doing the post read stuff
from a workqueue.

This was originally done because we can't block on IO under
generic_make_request(). But - we already have a mechanism to punt cache
lookups to workqueue if needed, so if we just use that we don't have to
deal with the complexity of doing things asynchronously.

The main benefit is this makes the locking situation saner; we can hold
our write lock on the btree node until we're finished reading it, and we
don't need that btree_node_read_done() flag anymore.

Also, for writes, btree_write() was broken out into btree_node_write()
and btree_leaf_dirty() - the old code with the boolean argument was dumb
and confusing.

The prio_blocked mechanism was improved a bit too, now the only counter
is in struct btree_write, we don't mess with transfering a count from
struct btree anymore.

This required changing garbage collection to block prios at the start
and unblock when it finishes, which is cleaner than what it was doing
anyways (the old code had mostly the same effect, but was doing it in a
convoluted way)

And the btree iter btree_node_read_done() uses was converted to a real
mempool.
Signed-off-by: default avatarKent Overstreet <koverstreet@google.com>
parent 119ba0f8
...@@ -819,10 +819,9 @@ struct cache_set { ...@@ -819,10 +819,9 @@ struct cache_set {
/* /*
* A btree node on disk could have too many bsets for an iterator to fit * A btree node on disk could have too many bsets for an iterator to fit
* on the stack - this is a single element mempool for btree_read_work() * on the stack - have to dynamically allocate them
*/ */
struct mutex fill_lock; mempool_t *fill_iter;
struct btree_iter *fill_iter;
/* /*
* btree_sort() is a merge sort and requires temporary space - single * btree_sort() is a merge sort and requires temporary space - single
......
This diff is collapsed.
...@@ -102,7 +102,6 @@ ...@@ -102,7 +102,6 @@
#include "debug.h" #include "debug.h"
struct btree_write { struct btree_write {
struct closure *owner;
atomic_t *journal; atomic_t *journal;
/* If btree_split() frees a btree node, it writes a new pointer to that /* If btree_split() frees a btree node, it writes a new pointer to that
...@@ -142,16 +141,12 @@ struct btree { ...@@ -142,16 +141,12 @@ struct btree {
*/ */
struct bset_tree sets[MAX_BSETS]; struct bset_tree sets[MAX_BSETS];
/* Used to refcount bio splits, also protects b->bio */ /* For outstanding btree writes, used as a lock - protects write_idx */
struct closure_with_waitlist io; struct closure_with_waitlist io;
/* Gets transferred to w->prio_blocked - see the comment there */
int prio_blocked;
struct list_head list; struct list_head list;
struct delayed_work work; struct delayed_work work;
uint64_t io_start_time;
struct btree_write writes[2]; struct btree_write writes[2];
struct bio *bio; struct bio *bio;
}; };
...@@ -164,13 +159,11 @@ static inline void set_btree_node_ ## flag(struct btree *b) \ ...@@ -164,13 +159,11 @@ static inline void set_btree_node_ ## flag(struct btree *b) \
{ set_bit(BTREE_NODE_ ## flag, &b->flags); } \ { set_bit(BTREE_NODE_ ## flag, &b->flags); } \
enum btree_flags { enum btree_flags {
BTREE_NODE_read_done,
BTREE_NODE_io_error, BTREE_NODE_io_error,
BTREE_NODE_dirty, BTREE_NODE_dirty,
BTREE_NODE_write_idx, BTREE_NODE_write_idx,
}; };
BTREE_FLAG(read_done);
BTREE_FLAG(io_error); BTREE_FLAG(io_error);
BTREE_FLAG(dirty); BTREE_FLAG(dirty);
BTREE_FLAG(write_idx); BTREE_FLAG(write_idx);
...@@ -293,9 +286,7 @@ static inline void rw_unlock(bool w, struct btree *b) ...@@ -293,9 +286,7 @@ static inline void rw_unlock(bool w, struct btree *b)
#ifdef CONFIG_BCACHE_EDEBUG #ifdef CONFIG_BCACHE_EDEBUG
unsigned i; unsigned i;
if (w && if (w && b->key.ptr[0])
b->key.ptr[0] &&
btree_node_read_done(b))
for (i = 0; i <= b->nsets; i++) for (i = 0; i <= b->nsets; i++)
bch_check_key_order(b, b->sets[i].data); bch_check_key_order(b, b->sets[i].data);
#endif #endif
...@@ -370,9 +361,9 @@ static inline bool should_split(struct btree *b) ...@@ -370,9 +361,9 @@ static inline bool should_split(struct btree *b)
> btree_blocks(b)); > btree_blocks(b));
} }
void bch_btree_read_done(struct closure *); void bch_btree_node_read(struct btree *);
void bch_btree_read(struct btree *); void bch_btree_node_read_done(struct btree *);
void bch_btree_write(struct btree *b, bool now, struct btree_op *op); void bch_btree_node_write(struct btree *, struct closure *);
void bch_cannibalize_unlock(struct cache_set *, struct closure *); void bch_cannibalize_unlock(struct cache_set *, struct closure *);
void bch_btree_set_root(struct btree *); void bch_btree_set_root(struct btree *);
......
...@@ -144,7 +144,7 @@ void bch_btree_verify(struct btree *b, struct bset *new) ...@@ -144,7 +144,7 @@ void bch_btree_verify(struct btree *b, struct bset *new)
v->written = 0; v->written = 0;
v->level = b->level; v->level = b->level;
bch_btree_read(v); bch_btree_node_read(v);
closure_wait_event(&v->io.wait, &cl, closure_wait_event(&v->io.wait, &cl,
atomic_read(&b->io.cl.remaining) == -1); atomic_read(&b->io.cl.remaining) == -1);
...@@ -512,7 +512,7 @@ static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a, ...@@ -512,7 +512,7 @@ static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a,
bch_btree_sort(b); bch_btree_sort(b);
fill->written = 0; fill->written = 0;
bch_btree_read_done(&fill->io.cl); bch_btree_node_read_done(fill);
if (b->sets[0].data->keys != fill->sets[0].data->keys || if (b->sets[0].data->keys != fill->sets[0].data->keys ||
memcmp(b->sets[0].data->start, memcmp(b->sets[0].data->start,
......
...@@ -384,7 +384,7 @@ static void btree_flush_write(struct cache_set *c) ...@@ -384,7 +384,7 @@ static void btree_flush_write(struct cache_set *c)
return; return;
found: found:
if (btree_node_dirty(best)) if (btree_node_dirty(best))
bch_btree_write(best, true, NULL); bch_btree_node_write(best, NULL);
rw_unlock(true, best); rw_unlock(true, best);
} }
......
...@@ -1255,9 +1255,10 @@ static void cache_set_free(struct closure *cl) ...@@ -1255,9 +1255,10 @@ static void cache_set_free(struct closure *cl)
free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
free_pages((unsigned long) c->sort, ilog2(bucket_pages(c))); free_pages((unsigned long) c->sort, ilog2(bucket_pages(c)));
kfree(c->fill_iter);
if (c->bio_split) if (c->bio_split)
bioset_free(c->bio_split); bioset_free(c->bio_split);
if (c->fill_iter)
mempool_destroy(c->fill_iter);
if (c->bio_meta) if (c->bio_meta)
mempool_destroy(c->bio_meta); mempool_destroy(c->bio_meta);
if (c->search) if (c->search)
...@@ -1295,7 +1296,7 @@ static void cache_set_flush(struct closure *cl) ...@@ -1295,7 +1296,7 @@ static void cache_set_flush(struct closure *cl)
/* Should skip this if we're unregistering because of an error */ /* Should skip this if we're unregistering because of an error */
list_for_each_entry(b, &c->btree_cache, list) list_for_each_entry(b, &c->btree_cache, list)
if (btree_node_dirty(b)) if (btree_node_dirty(b))
bch_btree_write(b, true, NULL); bch_btree_node_write(b, NULL);
closure_return(cl); closure_return(cl);
} }
...@@ -1374,7 +1375,6 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) ...@@ -1374,7 +1375,6 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
BTREE_MAX_PAGES); BTREE_MAX_PAGES);
mutex_init(&c->bucket_lock); mutex_init(&c->bucket_lock);
mutex_init(&c->fill_lock);
mutex_init(&c->sort_lock); mutex_init(&c->sort_lock);
spin_lock_init(&c->sort_time_lock); spin_lock_init(&c->sort_time_lock);
closure_init_unlocked(&c->sb_write); closure_init_unlocked(&c->sb_write);
...@@ -1400,8 +1400,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) ...@@ -1400,8 +1400,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
!(c->bio_meta = mempool_create_kmalloc_pool(2, !(c->bio_meta = mempool_create_kmalloc_pool(2,
sizeof(struct bbio) + sizeof(struct bio_vec) * sizeof(struct bbio) + sizeof(struct bio_vec) *
bucket_pages(c))) || bucket_pages(c))) ||
!(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
!(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
!(c->fill_iter = kmalloc(iter_size, GFP_KERNEL)) ||
!(c->sort = alloc_bucket_pages(GFP_KERNEL, c)) || !(c->sort = alloc_bucket_pages(GFP_KERNEL, c)) ||
!(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
bch_journal_alloc(c) || bch_journal_alloc(c) ||
...@@ -1409,8 +1409,6 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) ...@@ -1409,8 +1409,6 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
bch_open_buckets_alloc(c)) bch_open_buckets_alloc(c))
goto err; goto err;
c->fill_iter->size = sb->bucket_size / sb->block_size;
c->congested_read_threshold_us = 2000; c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000; c->congested_write_threshold_us = 20000;
c->error_limit = 8 << IO_ERROR_SHIFT; c->error_limit = 8 << IO_ERROR_SHIFT;
...@@ -1551,7 +1549,7 @@ static void run_cache_set(struct cache_set *c) ...@@ -1551,7 +1549,7 @@ static void run_cache_set(struct cache_set *c)
goto err_unlock_gc; goto err_unlock_gc;
bkey_copy_key(&c->root->key, &MAX_KEY); bkey_copy_key(&c->root->key, &MAX_KEY);
bch_btree_write(c->root, true, &op); bch_btree_node_write(c->root, &op.cl);
bch_btree_set_root(c->root); bch_btree_set_root(c->root);
rw_unlock(true, c->root); rw_unlock(true, c->root);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment