Commit 6c7585b0 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Rework allocating buckets for stripes

Allocating buckets for existing stripes was busted, in part because the
data structures were too contorted. This reworks new stripes so that we
have an array of open buckets that matches blocks in the stripe, and
it's sparse if we're reusing an existing stripe.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent f9ef45ad
...@@ -489,16 +489,20 @@ bucket_alloc_from_stripe(struct bch_fs *c, ...@@ -489,16 +489,20 @@ bucket_alloc_from_stripe(struct bch_fs *c,
devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc); devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
for (i = 0; i < devs_sorted.nr; i++) for (i = 0; i < devs_sorted.nr; i++)
open_bucket_for_each(c, &h->s->blocks, ob, ec_idx) for (ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) {
if (!h->s->blocks[ec_idx])
continue;
ob = c->open_buckets + h->s->blocks[ec_idx];
if (ob->ptr.dev == devs_sorted.devs[i] && if (ob->ptr.dev == devs_sorted.devs[i] &&
!test_and_set_bit(h->s->data_block_idx[ec_idx], !test_and_set_bit(ec_idx, h->s->blocks_allocated))
h->s->blocks_allocated))
goto got_bucket; goto got_bucket;
}
goto out_put_head; goto out_put_head;
got_bucket: got_bucket:
ca = bch_dev_bkey_exists(c, ob->ptr.dev); ca = bch_dev_bkey_exists(c, ob->ptr.dev);
ob->ec_idx = h->s->data_block_idx[ec_idx]; ob->ec_idx = ec_idx;
ob->ec = h->s; ob->ec = h->s;
add_new_bucket(c, ptrs, devs_may_alloc, add_new_bucket(c, ptrs, devs_may_alloc,
...@@ -636,10 +640,13 @@ void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca, ...@@ -636,10 +640,13 @@ void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
if (!drop && ob->ec) { if (!drop && ob->ec) {
mutex_lock(&ob->ec->lock); mutex_lock(&ob->ec->lock);
open_bucket_for_each(c, &ob->ec->blocks, ob2, j) for (j = 0; j < ob->ec->new_stripe.key.v.nr_blocks; j++) {
drop |= ob2->ptr.dev == ca->dev_idx; if (!ob->ec->blocks[j])
open_bucket_for_each(c, &ob->ec->parity, ob2, j) continue;
ob2 = c->open_buckets + ob->ec->blocks[j];
drop |= ob2->ptr.dev == ca->dev_idx; drop |= ob2->ptr.dev == ca->dev_idx;
}
mutex_unlock(&ob->ec->lock); mutex_unlock(&ob->ec->lock);
} }
......
...@@ -907,9 +907,6 @@ static void ec_stripe_create(struct ec_stripe_new *s) ...@@ -907,9 +907,6 @@ static void ec_stripe_create(struct ec_stripe_new *s)
if (!percpu_ref_tryget(&c->writes)) if (!percpu_ref_tryget(&c->writes))
goto err; goto err;
BUG_ON(bitmap_weight(s->blocks_allocated,
s->blocks.nr) != s->blocks.nr);
ec_generate_ec(&s->new_stripe); ec_generate_ec(&s->new_stripe);
ec_generate_checksums(&s->new_stripe); ec_generate_checksums(&s->new_stripe);
...@@ -952,12 +949,17 @@ static void ec_stripe_create(struct ec_stripe_new *s) ...@@ -952,12 +949,17 @@ static void ec_stripe_create(struct ec_stripe_new *s)
err: err:
bch2_disk_reservation_put(c, &s->res); bch2_disk_reservation_put(c, &s->res);
open_bucket_for_each(c, &s->blocks, ob, i) { for (i = 0; i < v->nr_blocks; i++)
if (s->blocks[i]) {
ob = c->open_buckets + s->blocks[i];
if (i < nr_data) {
ob->ec = NULL; ob->ec = NULL;
__bch2_open_bucket_put(c, ob); __bch2_open_bucket_put(c, ob);
} else {
bch2_open_bucket_put(c, ob);
}
} }
bch2_open_buckets_put(c, &s->parity);
bch2_keylist_free(&s->keys, s->inline_keys); bch2_keylist_free(&s->keys, s->inline_keys);
...@@ -1216,7 +1218,7 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h) ...@@ -1216,7 +1218,7 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
if (h->s && if (h->s &&
h->s->allocated && h->s->allocated &&
bitmap_weight(h->s->blocks_allocated, bitmap_weight(h->s->blocks_allocated,
h->s->blocks.nr) == h->s->blocks.nr) h->s->nr_data) == h->s->nr_data)
ec_stripe_set_pending(c, h); ec_stripe_set_pending(c, h);
mutex_unlock(&h->lock); mutex_unlock(&h->lock);
...@@ -1253,64 +1255,82 @@ static enum bucket_alloc_ret ...@@ -1253,64 +1255,82 @@ static enum bucket_alloc_ret
new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h, new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h,
struct closure *cl) struct closure *cl)
{ {
struct bch_devs_mask devs; struct bch_devs_mask devs = h->devs;
struct open_bucket *ob; struct open_bucket *ob;
unsigned i, nr_have, nr_data = struct open_buckets buckets;
min_t(unsigned, h->nr_active_devs, unsigned i, j, nr_have_parity = 0, nr_have_data = 0;
BCH_BKEY_PTRS_MAX) - h->redundancy;
bool have_cache = true; bool have_cache = true;
enum bucket_alloc_ret ret = ALLOC_SUCCESS; enum bucket_alloc_ret ret = ALLOC_SUCCESS;
devs = h->devs; for (i = 0; i < h->s->new_stripe.key.v.nr_blocks; i++) {
if (test_bit(i, h->s->blocks_gotten)) {
for_each_set_bit(i, h->s->blocks_allocated, BCH_BKEY_PTRS_MAX) {
__clear_bit(h->s->new_stripe.key.v.ptrs[i].dev, devs.d); __clear_bit(h->s->new_stripe.key.v.ptrs[i].dev, devs.d);
--nr_data; if (i < h->s->nr_data)
nr_have_data++;
else
nr_have_parity++;
}
} }
BUG_ON(h->s->blocks.nr > nr_data); BUG_ON(nr_have_data > h->s->nr_data);
BUG_ON(h->s->parity.nr > h->redundancy); BUG_ON(nr_have_parity > h->s->nr_parity);
open_bucket_for_each(c, &h->s->parity, ob, i)
__clear_bit(ob->ptr.dev, devs.d);
open_bucket_for_each(c, &h->s->blocks, ob, i)
__clear_bit(ob->ptr.dev, devs.d);
percpu_down_read(&c->mark_lock); percpu_down_read(&c->mark_lock);
rcu_read_lock(); rcu_read_lock();
if (h->s->parity.nr < h->redundancy) { buckets.nr = 0;
nr_have = h->s->parity.nr; if (nr_have_parity < h->s->nr_parity) {
ret = bch2_bucket_alloc_set(c, &buckets,
ret = bch2_bucket_alloc_set(c, &h->s->parity,
&h->parity_stripe, &h->parity_stripe,
&devs, &devs,
h->redundancy, h->s->nr_parity,
&nr_have, &nr_have_parity,
&have_cache, &have_cache,
h->copygc h->copygc
? RESERVE_MOVINGGC ? RESERVE_MOVINGGC
: RESERVE_NONE, : RESERVE_NONE,
0, 0,
cl); cl);
open_bucket_for_each(c, &buckets, ob, i) {
j = find_next_zero_bit(h->s->blocks_gotten,
h->s->nr_data + h->s->nr_parity,
h->s->nr_data);
BUG_ON(j >= h->s->nr_data + h->s->nr_parity);
h->s->blocks[j] = buckets.v[i];
h->s->new_stripe.key.v.ptrs[j] = ob->ptr;
__set_bit(j, h->s->blocks_gotten);
}
if (ret) if (ret)
goto err; goto err;
} }
if (h->s->blocks.nr < nr_data) { buckets.nr = 0;
nr_have = h->s->blocks.nr; if (nr_have_data < h->s->nr_data) {
ret = bch2_bucket_alloc_set(c, &buckets,
ret = bch2_bucket_alloc_set(c, &h->s->blocks,
&h->block_stripe, &h->block_stripe,
&devs, &devs,
nr_data, h->s->nr_data,
&nr_have, &nr_have_data,
&have_cache, &have_cache,
h->copygc h->copygc
? RESERVE_MOVINGGC ? RESERVE_MOVINGGC
: RESERVE_NONE, : RESERVE_NONE,
0, 0,
cl); cl);
open_bucket_for_each(c, &buckets, ob, i) {
j = find_next_zero_bit(h->s->blocks_gotten,
h->s->nr_data, 0);
BUG_ON(j >= h->s->nr_data);
h->s->blocks[j] = buckets.v[i];
h->s->new_stripe.key.v.ptrs[j] = ob->ptr;
__set_bit(j, h->s->blocks_gotten);
}
if (ret) if (ret)
goto err; goto err;
} }
...@@ -1362,8 +1382,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, ...@@ -1362,8 +1382,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
struct closure *cl) struct closure *cl)
{ {
struct ec_stripe_head *h; struct ec_stripe_head *h;
struct open_bucket *ob; unsigned i;
unsigned i, data_idx = 0;
s64 idx; s64 idx;
int ret; int ret;
...@@ -1398,9 +1417,14 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, ...@@ -1398,9 +1417,14 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
BUG(); BUG();
} }
BUG_ON(h->s->existing_stripe.size != h->blocksize);
BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors);
for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) { for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) {
__set_bit(i, h->s->blocks_gotten);
__set_bit(i, h->s->blocks_allocated); __set_bit(i, h->s->blocks_allocated);
}
ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone); ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
} }
...@@ -1438,20 +1462,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, ...@@ -1438,20 +1462,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
goto out; goto out;
} }
open_bucket_for_each(c, &h->s->blocks, ob, i) {
data_idx = find_next_zero_bit(h->s->blocks_allocated,
h->s->nr_data, data_idx);
BUG_ON(data_idx >= h->s->nr_data);
h->s->new_stripe.key.v.ptrs[data_idx] = ob->ptr;
h->s->data_block_idx[i] = data_idx;
data_idx++;
}
open_bucket_for_each(c, &h->s->parity, ob, i)
h->s->new_stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr;
//pr_info("new stripe, blocks_allocated %lx", h->s->blocks_allocated[0]);
h->s->allocated = true; h->s->allocated = true;
} }
out: out:
...@@ -1471,12 +1481,14 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) ...@@ -1471,12 +1481,14 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
if (!h->s) if (!h->s)
goto unlock; goto unlock;
open_bucket_for_each(c, &h->s->blocks, ob, i) for (i = 0; i < h->s->new_stripe.key.v.nr_blocks; i++) {
if (ob->ptr.dev == ca->dev_idx) if (!h->s->blocks[i])
goto found; continue;
open_bucket_for_each(c, &h->s->parity, ob, i)
ob = c->open_buckets + h->s->blocks[i];
if (ob->ptr.dev == ca->dev_idx) if (ob->ptr.dev == ca->dev_idx)
goto found; goto found;
}
goto unlock; goto unlock;
found: found:
h->s->err = -EROFS; h->s->err = -EROFS;
...@@ -1662,19 +1674,17 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) ...@@ -1662,19 +1674,17 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
h->target, h->algo, h->redundancy); h->target, h->algo, h->redundancy);
if (h->s) if (h->s)
pr_buf(out, "\tpending: blocks %u allocated %u\n", pr_buf(out, "\tpending: blocks %u+%u allocated %u\n",
h->s->blocks.nr, h->s->nr_data, h->s->nr_parity,
bitmap_weight(h->s->blocks_allocated, bitmap_weight(h->s->blocks_allocated,
h->s->blocks.nr)); h->s->nr_data));
} }
mutex_unlock(&c->ec_stripe_head_lock); mutex_unlock(&c->ec_stripe_head_lock);
mutex_lock(&c->ec_stripe_new_lock); mutex_lock(&c->ec_stripe_new_lock);
list_for_each_entry(s, &c->ec_stripe_new_list, list) { list_for_each_entry(s, &c->ec_stripe_new_list, list) {
pr_buf(out, "\tin flight: blocks %u allocated %u pin %u\n", pr_buf(out, "\tin flight: blocks %u+%u pin %u\n",
s->blocks.nr, s->nr_data, s->nr_parity,
bitmap_weight(s->blocks_allocated,
s->blocks.nr),
atomic_read(&s->pin)); atomic_read(&s->pin));
} }
mutex_unlock(&c->ec_stripe_new_lock); mutex_unlock(&c->ec_stripe_new_lock);
......
...@@ -143,11 +143,9 @@ struct ec_stripe_new { ...@@ -143,11 +143,9 @@ struct ec_stripe_new {
bool pending; bool pending;
bool have_existing_stripe; bool have_existing_stripe;
unsigned long blocks_gotten[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
unsigned long blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)]; unsigned long blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
open_bucket_idx_t blocks[BCH_BKEY_PTRS_MAX];
struct open_buckets blocks;
u8 data_block_idx[BCH_BKEY_PTRS_MAX];
struct open_buckets parity;
struct disk_reservation res; struct disk_reservation res;
struct keylist keys; struct keylist keys;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment