Commit ee811287 authored by Kent Overstreet's avatar Kent Overstreet

bcache: Rename/shuffle various code around

More work to disentangle bset.c from the rest of the code:
Signed-off-by: default avatarKent Overstreet <kmo@daterainc.com>
parent 67539e85
......@@ -717,14 +717,6 @@ struct bbio {
#define bucket_bytes(c) ((c)->sb.bucket_size << 9)
#define block_bytes(c) ((c)->sb.block_size << 9)
#define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t))
#define set_bytes(i) __set_bytes(i, i->keys)
#define __set_blocks(i, k, c) DIV_ROUND_UP(__set_bytes(i, k), block_bytes(c))
#define set_blocks(i, c) __set_blocks(i, (i)->keys, c)
#define btree_data_space(b) (PAGE_SIZE << (b)->page_order)
#define prios_per_bucket(c) \
((bucket_bytes(c) - sizeof(struct prio_set)) / \
sizeof(struct bucket_disk))
......
......@@ -302,6 +302,115 @@ bool bch_bkey_try_merge(struct btree *b, struct bkey *l, struct bkey *r)
return true;
}
/* Auxiliary search trees */
/* 32 bits total: */
#define BKEY_MID_BITS 3
#define BKEY_EXPONENT_BITS 7
#define BKEY_MANTISSA_BITS (32 - BKEY_MID_BITS - BKEY_EXPONENT_BITS)
#define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1)
struct bkey_float {
unsigned exponent:BKEY_EXPONENT_BITS;
unsigned m:BKEY_MID_BITS;
unsigned mantissa:BKEY_MANTISSA_BITS;
} __packed;
/*
* BSET_CACHELINE was originally intended to match the hardware cacheline size -
* it used to be 64, but I realized the lookup code would touch slightly less
* memory if it was 128.
*
* It definites the number of bytes (in struct bset) per struct bkey_float in
* the auxiliar search tree - when we're done searching the bset_float tree we
* have this many bytes left that we do a linear search over.
*
* Since (after level 5) every level of the bset_tree is on a new cacheline,
* we're touching one fewer cacheline in the bset tree in exchange for one more
* cacheline in the linear search - but the linear search might stop before it
* gets to the second cacheline.
*/
#define BSET_CACHELINE 128
/* Space required for the btree node keys */
static inline size_t btree_keys_bytes(struct btree *b)
{
return PAGE_SIZE << b->page_order;
}
static inline size_t btree_keys_cachelines(struct btree *b)
{
return btree_keys_bytes(b) / BSET_CACHELINE;
}
/* Space required for the auxiliary search trees */
static inline size_t bset_tree_bytes(struct btree *b)
{
return btree_keys_cachelines(b) * sizeof(struct bkey_float);
}
/* Space required for the prev pointers */
static inline size_t bset_prev_bytes(struct btree *b)
{
return btree_keys_cachelines(b) * sizeof(uint8_t);
}
/* Memory allocation */
void bch_btree_keys_free(struct btree *b)
{
struct bset_tree *t = b->sets;
if (bset_prev_bytes(b) < PAGE_SIZE)
kfree(t->prev);
else
free_pages((unsigned long) t->prev,
get_order(bset_prev_bytes(b)));
if (bset_tree_bytes(b) < PAGE_SIZE)
kfree(t->tree);
else
free_pages((unsigned long) t->tree,
get_order(bset_tree_bytes(b)));
free_pages((unsigned long) t->data, b->page_order);
t->prev = NULL;
t->tree = NULL;
t->data = NULL;
}
int bch_btree_keys_alloc(struct btree *b, unsigned page_order, gfp_t gfp)
{
struct bset_tree *t = b->sets;
BUG_ON(t->data);
b->page_order = page_order;
t->data = (void *) __get_free_pages(gfp, b->page_order);
if (!t->data)
goto err;
t->tree = bset_tree_bytes(b) < PAGE_SIZE
? kmalloc(bset_tree_bytes(b), gfp)
: (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b)));
if (!t->tree)
goto err;
t->prev = bset_prev_bytes(b) < PAGE_SIZE
? kmalloc(bset_prev_bytes(b), gfp)
: (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b)));
if (!t->prev)
goto err;
return 0;
err:
bch_btree_keys_free(b);
return -ENOMEM;
}
/* Binary tree stuff for auxiliary search trees */
static unsigned inorder_next(unsigned j, unsigned size)
......@@ -538,21 +647,36 @@ static void bset_alloc_tree(struct btree *b, struct bset_tree *t)
t++->size = 0;
}
static void bset_build_unwritten_tree(struct btree *b)
static void bch_bset_build_unwritten_tree(struct btree *b)
{
struct bset_tree *t = b->sets + b->nsets;
struct bset_tree *t = bset_tree_last(b);
bset_alloc_tree(b, t);
if (t->tree != b->sets->tree + bset_tree_space(b)) {
if (t->tree != b->sets->tree + btree_keys_cachelines(b)) {
t->prev[0] = bkey_to_cacheline_offset(t->data->start);
t->size = 1;
}
}
void bch_bset_init_next(struct btree *b, struct bset *i, uint64_t magic)
{
if (i != b->sets->data) {
b->sets[++b->nsets].data = i;
i->seq = b->sets->data->seq;
} else
get_random_bytes(&i->seq, sizeof(uint64_t));
i->magic = magic;
i->version = 0;
i->keys = 0;
bch_bset_build_unwritten_tree(b);
}
static void bset_build_written_tree(struct btree *b)
{
struct bset_tree *t = b->sets + b->nsets;
struct bset_tree *t = bset_tree_last(b);
struct bkey *k = t->data->start;
unsigned j, cacheline = 1;
......@@ -560,7 +684,7 @@ static void bset_build_written_tree(struct btree *b)
t->size = min_t(unsigned,
bkey_to_cacheline(t, bset_bkey_last(t->data)),
b->sets->tree + bset_tree_space(b) - t->tree);
b->sets->tree + btree_keys_cachelines(b) - t->tree);
if (t->size < 2) {
t->size = 0;
......@@ -599,7 +723,7 @@ void bch_bset_fix_invalidated_key(struct btree *b, struct bkey *k)
struct bset_tree *t;
unsigned inorder, j = 1;
for (t = b->sets; t <= &b->sets[b->nsets]; t++)
for (t = b->sets; t <= bset_tree_last(b); t++)
if (k < bset_bkey_last(t->data))
goto found_set;
......@@ -639,9 +763,10 @@ fix_right: do {
} while (j < t->size);
}
void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k)
static void bch_bset_fix_lookup_table(struct btree *b,
struct bset_tree *t,
struct bkey *k)
{
struct bset_tree *t = &b->sets[b->nsets];
unsigned shift = bkey_u64s(k);
unsigned j = bkey_to_cacheline(t, k);
......@@ -673,7 +798,7 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k)
}
}
if (t->size == b->sets->tree + bset_tree_space(b) - t->tree)
if (t->size == b->sets->tree + btree_keys_cachelines(b) - t->tree)
return;
/* Possibly add a new entry to the end of the lookup table */
......@@ -687,21 +812,23 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k)
}
}
void bch_bset_init_next(struct btree *b)
void bch_bset_insert(struct btree *b, struct bkey *where,
struct bkey *insert)
{
struct bset *i = write_block(b);
struct bset_tree *t = bset_tree_last(b);
if (i != b->sets[0].data) {
b->sets[++b->nsets].data = i;
i->seq = b->sets[0].data->seq;
} else
get_random_bytes(&i->seq, sizeof(uint64_t));
BUG_ON(t->data != write_block(b));
BUG_ON(bset_byte_offset(b, t->data) +
__set_bytes(t->data, t->data->keys + bkey_u64s(insert)) >
PAGE_SIZE << b->page_order);
i->magic = bset_magic(&b->c->sb);
i->version = 0;
i->keys = 0;
memmove((uint64_t *) where + bkey_u64s(insert),
where,
(void *) bset_bkey_last(t->data) - (void *) where);
bset_build_unwritten_tree(b);
t->data->keys += bkey_u64s(insert);
bkey_copy(where, insert);
bch_bset_fix_lookup_table(b, t, where);
}
struct bset_search_iter {
......@@ -1154,9 +1281,8 @@ void bch_btree_sort_partial(struct btree *b, unsigned start,
__bch_btree_iter_init(b, &iter, NULL, &b->sets[start]);
BUG_ON(b->sets[b->nsets].data == write_block(b) &&
(b->sets[b->nsets].size || b->nsets));
BUG_ON(!bset_written(b, bset_tree_last(b)) &&
(bset_tree_last(b)->size || b->nsets));
if (start) {
unsigned i;
......
......@@ -144,22 +144,11 @@
* first key in that range of bytes again.
*/
struct cache_set;
/* Btree key comparison/iteration */
struct btree;
struct bkey_float;
#define MAX_BSETS 4U
struct btree_iter {
size_t size, used;
#ifdef CONFIG_BCACHE_DEBUG
struct btree *b;
#endif
struct btree_iter_set {
struct bkey *k, *end;
} data[MAX_BSETS];
};
struct bset_tree {
/*
* We construct a binary tree in an array as if the array
......@@ -169,14 +158,14 @@ struct bset_tree {
*/
/* size of the binary tree and prev array */
unsigned size;
unsigned size;
/* function of size - precalculated for to_inorder() */
unsigned extra;
unsigned extra;
/* copy of the last key in the set */
struct bkey end;
struct bkey_float *tree;
struct bkey end;
struct bkey_float *tree;
/*
* The nodes in the bset tree point to specific keys - this
......@@ -186,12 +175,61 @@ struct bset_tree {
* to keep bkey_float to 4 bytes and prev isn't used in the fast
* path.
*/
uint8_t *prev;
uint8_t *prev;
/* The actual btree node, with pointers to each sorted set */
struct bset *data;
struct bset *data;
};
#define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t))
#define set_bytes(i) __set_bytes(i, i->keys)
#define __set_blocks(i, k, block_bytes) \
DIV_ROUND_UP(__set_bytes(i, k), block_bytes)
#define set_blocks(i, block_bytes) \
__set_blocks(i, (i)->keys, block_bytes)
void bch_btree_keys_free(struct btree *);
int bch_btree_keys_alloc(struct btree *, unsigned, gfp_t);
void bch_bset_fix_invalidated_key(struct btree *, struct bkey *);
void bch_bset_init_next(struct btree *, struct bset *, uint64_t);
void bch_bset_insert(struct btree *, struct bkey *, struct bkey *);
/* Btree key iteration */
struct btree_iter {
size_t size, used;
#ifdef CONFIG_BCACHE_DEBUG
struct btree *b;
#endif
struct btree_iter_set {
struct bkey *k, *end;
} data[MAX_BSETS];
};
typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *);
struct bkey *bch_btree_iter_next(struct btree_iter *);
struct bkey *bch_btree_iter_next_filter(struct btree_iter *,
struct btree *, ptr_filter_fn);
void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *);
struct bkey *bch_btree_iter_init(struct btree *, struct btree_iter *,
struct bkey *);
struct bkey *__bch_bset_search(struct btree *, struct bset_tree *,
const struct bkey *);
/*
* Returns the first key that is strictly greater than search
*/
static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t,
const struct bkey *search)
{
return search ? __bch_bset_search(b, t, search) : t->data->start;
}
/* Sorting */
struct bset_sort_state {
......@@ -219,6 +257,60 @@ static inline void bch_btree_sort(struct btree *b,
bch_btree_sort_partial(b, 0, state);
}
/* Bkey utility code */
#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys)
static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx)
{
return bkey_idx(i->start, idx);
}
static inline void bkey_init(struct bkey *k)
{
*k = ZERO_KEY;
}
static __always_inline int64_t bkey_cmp(const struct bkey *l,
const struct bkey *r)
{
return unlikely(KEY_INODE(l) != KEY_INODE(r))
? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r)
: (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r);
}
void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
unsigned);
bool __bch_cut_front(const struct bkey *, struct bkey *);
bool __bch_cut_back(const struct bkey *, struct bkey *);
static inline bool bch_cut_front(const struct bkey *where, struct bkey *k)
{
BUG_ON(bkey_cmp(where, k) > 0);
return __bch_cut_front(where, k);
}
static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
{
BUG_ON(bkey_cmp(where, &START_KEY(k)) < 0);
return __bch_cut_back(where, k);
}
#define PRECEDING_KEY(_k) \
({ \
struct bkey *_ret = NULL; \
\
if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
_ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
\
if (!_ret->low) \
_ret->high--; \
_ret->low--; \
} \
\
_ret; \
})
/* Keylists */
struct keylist {
......@@ -282,126 +374,15 @@ struct bkey *bch_keylist_pop(struct keylist *);
void bch_keylist_pop_front(struct keylist *);
int __bch_keylist_realloc(struct keylist *, unsigned);
/* Bkey utility code */
#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys)
static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx)
{
return bkey_idx(i->start, idx);
}
static inline void bkey_init(struct bkey *k)
{
*k = ZERO_KEY;
}
static __always_inline int64_t bkey_cmp(const struct bkey *l,
const struct bkey *r)
{
return unlikely(KEY_INODE(l) != KEY_INODE(r))
? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r)
: (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r);
}
void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
unsigned);
bool __bch_cut_front(const struct bkey *, struct bkey *);
bool __bch_cut_back(const struct bkey *, struct bkey *);
static inline bool bch_cut_front(const struct bkey *where, struct bkey *k)
{
BUG_ON(bkey_cmp(where, k) > 0);
return __bch_cut_front(where, k);
}
static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
{
BUG_ON(bkey_cmp(where, &START_KEY(k)) < 0);
return __bch_cut_back(where, k);
}
struct cache_set;
const char *bch_ptr_status(struct cache_set *, const struct bkey *);
bool bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
bool bch_extent_ptr_invalid(struct cache_set *, const struct bkey *);
bool bch_btree_ptr_bad(struct btree *, const struct bkey *);
bool bch_extent_ptr_bad(struct btree *, const struct bkey *);
bool bch_ptr_bad(struct btree *, const struct bkey *);
typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *);
struct bkey *bch_btree_iter_next(struct btree_iter *);
struct bkey *bch_btree_iter_next_filter(struct btree_iter *,
struct btree *, ptr_filter_fn);
void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *);
struct bkey *bch_btree_iter_init(struct btree *, struct btree_iter *,
struct bkey *);
/* 32 bits total: */
#define BKEY_MID_BITS 3
#define BKEY_EXPONENT_BITS 7
#define BKEY_MANTISSA_BITS 22
#define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1)
struct bkey_float {
unsigned exponent:BKEY_EXPONENT_BITS;
unsigned m:BKEY_MID_BITS;
unsigned mantissa:BKEY_MANTISSA_BITS;
} __packed;
/*
* BSET_CACHELINE was originally intended to match the hardware cacheline size -
* it used to be 64, but I realized the lookup code would touch slightly less
* memory if it was 128.
*
* It definites the number of bytes (in struct bset) per struct bkey_float in
* the auxiliar search tree - when we're done searching the bset_float tree we
* have this many bytes left that we do a linear search over.
*
* Since (after level 5) every level of the bset_tree is on a new cacheline,
* we're touching one fewer cacheline in the bset tree in exchange for one more
* cacheline in the linear search - but the linear search might stop before it
* gets to the second cacheline.
*/
#define BSET_CACHELINE 128
#define bset_tree_space(b) (btree_data_space(b) / BSET_CACHELINE)
#define bset_tree_bytes(b) (bset_tree_space(b) * sizeof(struct bkey_float))
#define bset_prev_bytes(b) (bset_tree_space(b) * sizeof(uint8_t))
void bch_bset_init_next(struct btree *);
void bch_bset_fix_invalidated_key(struct btree *, struct bkey *);
void bch_bset_fix_lookup_table(struct btree *, struct bkey *);
struct bkey *__bch_bset_search(struct btree *, struct bset_tree *,
const struct bkey *);
/*
* Returns the first key that is strictly greater than search
*/
static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t,
const struct bkey *search)
{
return search ? __bch_bset_search(b, t, search) : t->data->start;
}
#define PRECEDING_KEY(_k) \
({ \
struct bkey *_ret = NULL; \
\
if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
_ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
\
if (!_ret->low) \
_ret->high--; \
_ret->low--; \
} \
\
_ret; \
})
bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *);
int bch_bset_print_stats(struct cache_set *, char *);
......
This diff is collapsed.
......@@ -180,9 +180,9 @@ static inline struct btree_write *btree_prev_write(struct btree *b)
return b->writes + (btree_node_write_idx(b) ^ 1);
}
static inline unsigned bset_offset(struct btree *b, struct bset *i)
static inline struct bset_tree *bset_tree_last(struct btree *b)
{
return (((size_t) i) - ((size_t) b->sets->data)) >> 9;
return b->sets + b->nsets;
}
static inline struct bset *btree_bset_first(struct btree *b)
......@@ -190,6 +190,11 @@ static inline struct bset *btree_bset_first(struct btree *b)
return b->sets->data;
}
static inline struct bset *btree_bset_last(struct btree *b)
{
return bset_tree_last(b)->data;
}
static inline unsigned bset_byte_offset(struct btree *b, struct bset *i)
{
return ((size_t) i) - ((size_t) b->sets->data);
......
......@@ -123,7 +123,8 @@ static void bch_dump_bucket(struct btree *b)
for (i = (start); \
(void *) i < (void *) (start) + (KEY_SIZE(&b->key) << 9) &&\
i->seq == (start)->seq; \
i = (void *) i + set_blocks(i, b->c) * block_bytes(b->c))
i = (void *) i + set_blocks(i, block_bytes(b->c)) * \
block_bytes(b->c))
void bch_btree_verify(struct btree *b)
{
......
......@@ -95,7 +95,7 @@ reread: left = ca->sb.bucket_size - offset;
return ret;
}
blocks = set_blocks(j, ca->set);
blocks = set_blocks(j, block_bytes(ca->set));
while (!list_empty(list)) {
i = list_first_entry(list,
......@@ -579,7 +579,8 @@ static void journal_write_unlocked(struct closure *cl)
struct cache *ca;
struct journal_write *w = c->journal.cur;
struct bkey *k = &c->journal.key;
unsigned i, sectors = set_blocks(w->data, c) * c->sb.block_size;
unsigned i, sectors = set_blocks(w->data, block_bytes(c)) *
c->sb.block_size;
struct bio *bio;
struct bio_list list;
......@@ -595,7 +596,7 @@ static void journal_write_unlocked(struct closure *cl)
continue_at(cl, journal_write, system_wq);
}
c->journal.blocks_free -= set_blocks(w->data, c);
c->journal.blocks_free -= set_blocks(w->data, block_bytes(c));
w->data->btree_level = c->root->level;
......@@ -685,7 +686,7 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c,
struct journal_write *w = c->journal.cur;
sectors = __set_blocks(w->data, w->data->keys + nkeys,
c) * c->sb.block_size;
block_bytes(c)) * c->sb.block_size;
if (sectors <= min_t(size_t,
c->journal.blocks_free * c->sb.block_size,
......
......@@ -1477,7 +1477,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->block_bits = ilog2(sb->block_size);
c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry);
c->btree_pages = c->sb.bucket_size / PAGE_SECTORS;
c->btree_pages = bucket_pages(c);
if (c->btree_pages > BTREE_MAX_PAGES)
c->btree_pages = max_t(int, c->btree_pages / 4,
BTREE_MAX_PAGES);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment