Commit 5735608c authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Kill main in-memory bucket array

All code using the in-memory bucket array, excluding GC, has now been
converted to use the alloc btree directly - so we can finally delete it.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 5f43f99c
......@@ -400,14 +400,13 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
pr_buf(out, " write_time %llu", a.io_time[WRITE]);
}
int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
int bch2_alloc_read(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bch_alloc_v4 a;
struct bch_dev *ca;
struct bucket *g;
int ret;
bch2_trans_init(&trans, c, 0, 0);
......@@ -415,30 +414,9 @@ int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
ca = bch_dev_bkey_exists(c, k.k->p.inode);
g = __bucket(ca, k.k->p.offset, gc);
bch2_alloc_to_v4(k, &a);
if (!gc)
*bucket_gen(ca, k.k->p.offset) = a.gen;
g->_mark.gen = a.gen;
g->io_time[READ] = a.io_time[READ];
g->io_time[WRITE] = a.io_time[WRITE];
g->gen_valid = 1;
if (!gc ||
(metadata_only &&
(a.data_type == BCH_DATA_user ||
a.data_type == BCH_DATA_cached ||
a.data_type == BCH_DATA_parity))) {
g->_mark.data_type = a.data_type;
g->_mark.dirty_sectors = a.dirty_sectors;
g->_mark.cached_sectors = a.cached_sectors;
g->_mark.stripe = a.stripe != 0;
g->stripe = a.stripe;
g->stripe_redundancy = a.stripe_redundancy;
}
*bucket_gen(ca, k.k->p.offset) = a.gen;
}
bch2_trans_iter_exit(&trans, &iter);
......
......@@ -109,7 +109,7 @@ static inline bool bkey_is_alloc(const struct bkey *k)
k->type == KEY_TYPE_alloc_v3;
}
int bch2_alloc_read(struct bch_fs *, bool, bool);
int bch2_alloc_read(struct bch_fs *);
int bch2_trans_mark_alloc(struct btree_trans *, struct bkey_s_c,
struct bkey_i *, unsigned);
......
......@@ -450,7 +450,7 @@ struct bch_dev {
* gc_lock, for device resize - holding any is sufficient for access:
* Or rcu_read_lock(), but only for ptr_stale():
*/
struct bucket_array __rcu *buckets[2];
struct bucket_array __rcu *buckets_gc;
struct bucket_gens __rcu *bucket_gens;
u8 *oldest_gen;
unsigned long *buckets_nouse;
......
......@@ -1160,10 +1160,10 @@ static void bch2_gc_free(struct bch_fs *c)
genradix_free(&c->gc_stripes);
for_each_member_device(ca, c, i) {
kvpfree(rcu_dereference_protected(ca->buckets[1], 1),
kvpfree(rcu_dereference_protected(ca->buckets_gc, 1),
sizeof(struct bucket_array) +
ca->mi.nbuckets * sizeof(struct bucket));
ca->buckets[1] = NULL;
ca->buckets_gc = NULL;
free_percpu(ca->usage_gc);
ca->usage_gc = NULL;
......@@ -1292,7 +1292,7 @@ static int bch2_gc_start(struct bch_fs *c,
}
for_each_member_device(ca, c, i) {
BUG_ON(ca->buckets[1]);
BUG_ON(ca->buckets_gc);
BUG_ON(ca->usage_gc);
ca->usage_gc = alloc_percpu(struct bch_dev_usage);
......@@ -1346,8 +1346,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
.data_type = g->mark.data_type,
.dirty_sectors = g->mark.dirty_sectors,
.cached_sectors = g->mark.cached_sectors,
.io_time[READ] = g->io_time[READ],
.io_time[WRITE] = g->io_time[WRITE],
.stripe = g->stripe,
.stripe_redundancy = g->stripe_redundancy,
};
......@@ -1437,7 +1435,13 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only)
static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
{
struct bch_dev *ca;
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bucket *g;
struct bch_alloc_v4 a;
unsigned i;
int ret;
for_each_member_device(ca, c, i) {
struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) +
......@@ -1445,17 +1449,47 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
GFP_KERNEL|__GFP_ZERO);
if (!buckets) {
percpu_ref_put(&ca->ref);
percpu_up_write(&c->mark_lock);
bch_err(c, "error allocating ca->buckets[gc]");
return -ENOMEM;
}
buckets->first_bucket = ca->mi.first_bucket;
buckets->nbuckets = ca->mi.nbuckets;
rcu_assign_pointer(ca->buckets[1], buckets);
rcu_assign_pointer(ca->buckets_gc, buckets);
};
return bch2_alloc_read(c, true, metadata_only);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
ca = bch_dev_bkey_exists(c, k.k->p.inode);
g = gc_bucket(ca, k.k->p.offset);
bch2_alloc_to_v4(k, &a);
g->_mark.gen = a.gen;
g->gen_valid = 1;
if (metadata_only &&
(a.data_type == BCH_DATA_user ||
a.data_type == BCH_DATA_cached ||
a.data_type == BCH_DATA_parity)) {
g->_mark.data_type = a.data_type;
g->_mark.dirty_sectors = a.dirty_sectors;
g->_mark.cached_sectors = a.cached_sectors;
g->_mark.stripe = a.stripe != 0;
g->stripe = a.stripe;
g->stripe_redundancy = a.stripe_redundancy;
}
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
if (ret)
bch_err(c, "error reading alloc info at gc start: %i", ret);
return ret;
}
static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only)
......@@ -1464,7 +1498,7 @@ static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only)
unsigned i;
for_each_member_device(ca, c, i) {
struct bucket_array *buckets = __bucket_array(ca, true);
struct bucket_array *buckets = gc_bucket_array(ca);
struct bucket *g;
for_each_bucket(g, buckets) {
......
......@@ -512,8 +512,6 @@ int bch2_mark_alloc(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct bch_alloc_v4 old_a, new_a;
struct bch_dev *ca = bch_dev_bkey_exists(c, new.k->p.inode);
struct bucket *g;
struct bucket_mark old_m, m;
int ret = 0;
if (bch2_trans_inconsistent_on(new.k->p.offset < ca->mi.first_bucket ||
......@@ -587,21 +585,22 @@ int bch2_mark_alloc(struct btree_trans *trans,
bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc);
g = __bucket(ca, new.k->p.offset, gc);
old_m = bucket_cmpxchg(g, m, ({
m.gen = new_a.gen;
m.data_type = new_a.data_type;
m.dirty_sectors = new_a.dirty_sectors;
m.cached_sectors = new_a.cached_sectors;
m.stripe = new_a.stripe != 0;
}));
g->io_time[READ] = new_a.io_time[READ];
g->io_time[WRITE] = new_a.io_time[WRITE];
g->gen_valid = 1;
g->stripe = new_a.stripe;
g->stripe_redundancy = new_a.stripe_redundancy;
if (gc) {
struct bucket_mark old_m, m;
struct bucket *g = gc_bucket(ca, new.k->p.offset);
old_m = bucket_cmpxchg(g, m, ({
m.gen = new_a.gen;
m.data_type = new_a.data_type;
m.dirty_sectors = new_a.dirty_sectors;
m.cached_sectors = new_a.cached_sectors;
m.stripe = new_a.stripe != 0;
}));
g->gen_valid = 1;
g->stripe = new_a.stripe;
g->stripe_redundancy = new_a.stripe_redundancy;
}
percpu_up_read(&c->mark_lock);
/*
......@@ -610,9 +609,9 @@ int bch2_mark_alloc(struct btree_trans *trans,
*/
if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
old_m.cached_sectors) {
old_a.cached_sectors) {
ret = update_cached_sectors(c, new, ca->dev_idx,
-old_m.cached_sectors,
-old_a.cached_sectors,
journal_seq, gc);
if (ret) {
bch2_fs_fatal_error(c, "bch2_mark_alloc(): no replicas entry while updating cached sectors");
......@@ -620,7 +619,7 @@ int bch2_mark_alloc(struct btree_trans *trans,
}
trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset),
old_m.cached_sectors);
old_a.cached_sectors);
}
return 0;
......@@ -2039,16 +2038,6 @@ int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
/* Startup/shutdown: */
static void buckets_free_rcu(struct rcu_head *rcu)
{
struct bucket_array *buckets =
container_of(rcu, struct bucket_array, rcu);
kvpfree(buckets,
sizeof(*buckets) +
buckets->nbuckets * sizeof(struct bucket));
}
static void bucket_gens_free_rcu(struct rcu_head *rcu)
{
struct bucket_gens *buckets =
......@@ -2059,16 +2048,12 @@ static void bucket_gens_free_rcu(struct rcu_head *rcu)
int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
{
struct bucket_array *buckets = NULL, *old_buckets = NULL;
struct bucket_gens *bucket_gens = NULL, *old_bucket_gens = NULL;
unsigned long *buckets_nouse = NULL;
bool resize = ca->buckets[0] != NULL;
bool resize = ca->bucket_gens != NULL;
int ret = -ENOMEM;
if (!(buckets = kvpmalloc(sizeof(struct bucket_array) +
nbuckets * sizeof(struct bucket),
GFP_KERNEL|__GFP_ZERO)) ||
!(bucket_gens = kvpmalloc(sizeof(struct bucket_gens) + nbuckets,
if (!(bucket_gens = kvpmalloc(sizeof(struct bucket_gens) + nbuckets,
GFP_KERNEL|__GFP_ZERO)) ||
(c->opts.buckets_nouse &&
!(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) *
......@@ -2076,8 +2061,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
GFP_KERNEL|__GFP_ZERO))))
goto err;
buckets->first_bucket = ca->mi.first_bucket;
buckets->nbuckets = nbuckets;
bucket_gens->first_bucket = ca->mi.first_bucket;
bucket_gens->nbuckets = nbuckets;
......@@ -2089,15 +2072,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
percpu_down_write(&c->mark_lock);
}
old_buckets = bucket_array(ca);
old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1);
if (resize) {
size_t n = min(buckets->nbuckets, old_buckets->nbuckets);
size_t n = min(bucket_gens->nbuckets, old_bucket_gens->nbuckets);
memcpy(buckets->b,
old_buckets->b,
n * sizeof(struct bucket));
memcpy(bucket_gens->b,
old_bucket_gens->b,
n);
......@@ -2107,31 +2086,25 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
BITS_TO_LONGS(n) * sizeof(unsigned long));
}
rcu_assign_pointer(ca->buckets[0], buckets);
rcu_assign_pointer(ca->bucket_gens, bucket_gens);
buckets = old_buckets;
bucket_gens = old_bucket_gens;
swap(ca->buckets_nouse, buckets_nouse);
nbuckets = ca->mi.nbuckets;
if (resize) {
percpu_up_write(&c->mark_lock);
up_write(&ca->bucket_lock);
up_write(&c->gc_lock);
}
nbuckets = ca->mi.nbuckets;
if (resize)
up_write(&ca->bucket_lock);
ret = 0;
err:
kvpfree(buckets_nouse,
BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
if (bucket_gens)
call_rcu(&bucket_gens->rcu, bucket_gens_free_rcu);
if (buckets)
call_rcu(&buckets->rcu, buckets_free_rcu);
return ret;
}
......@@ -2144,9 +2117,6 @@ void bch2_dev_buckets_free(struct bch_dev *ca)
BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
kvpfree(rcu_dereference_protected(ca->bucket_gens, 1),
sizeof(struct bucket_gens) + ca->mi.nbuckets);
kvpfree(rcu_dereference_protected(ca->buckets[0], 1),
sizeof(struct bucket_array) +
ca->mi.nbuckets * sizeof(struct bucket));
for (i = 0; i < ARRAY_SIZE(ca->usage); i++)
free_percpu(ca->usage[i]);
......
......@@ -30,34 +30,23 @@
_old; \
})
static inline struct bucket_array *__bucket_array(struct bch_dev *ca,
bool gc)
static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca)
{
return rcu_dereference_check(ca->buckets[gc],
return rcu_dereference_check(ca->buckets_gc,
!ca->fs ||
percpu_rwsem_is_held(&ca->fs->mark_lock) ||
lockdep_is_held(&ca->fs->gc_lock) ||
lockdep_is_held(&ca->bucket_lock));
}
static inline struct bucket_array *bucket_array(struct bch_dev *ca)
{
return __bucket_array(ca, false);
}
static inline struct bucket *__bucket(struct bch_dev *ca, size_t b, bool gc)
static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b)
{
struct bucket_array *buckets = __bucket_array(ca, gc);
struct bucket_array *buckets = gc_bucket_array(ca);
BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets);
return buckets->b + b;
}
static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b)
{
return __bucket(ca, b, true);
}
static inline struct bucket_gens *bucket_gens(struct bch_dev *ca)
{
return rcu_dereference_check(ca->bucket_gens,
......@@ -65,7 +54,6 @@ static inline struct bucket_gens *bucket_gens(struct bch_dev *ca)
percpu_rwsem_is_held(&ca->fs->mark_lock) ||
lockdep_is_held(&ca->fs->gc_lock) ||
lockdep_is_held(&ca->bucket_lock));
}
static inline u8 *bucket_gen(struct bch_dev *ca, size_t b)
......
......@@ -27,7 +27,6 @@ struct bucket {
const struct bucket_mark mark;
};
u64 io_time[2];
unsigned gen_valid:1;
u8 stripe_redundancy;
u32 stripe;
......
......@@ -1139,7 +1139,7 @@ int bch2_fs_recovery(struct bch_fs *c)
err = "error reading allocation information";
down_read(&c->gc_lock);
ret = bch2_alloc_read(c, false, false);
ret = bch2_alloc_read(c);
up_read(&c->gc_lock);
if (ret)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment