Commit f6b94a3b authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Refactor stripe creation

Prep work for the patch to update existing stripes with new data blocks.
This moves allocating new stripes into ec.c, and also sets up the data
structures so that we can handly only allocating some of the blocks in a
stripe.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 703e2a43
...@@ -344,10 +344,9 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c, ...@@ -344,10 +344,9 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
struct bch_devs_mask *devs) struct bch_devs_mask *devs)
{ {
struct dev_alloc_list ret = { .nr = 0 }; struct dev_alloc_list ret = { .nr = 0 };
struct bch_dev *ca;
unsigned i; unsigned i;
for_each_member_device_rcu(ca, c, i, devs) for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX)
ret.devs[ret.nr++] = i; ret.devs[ret.nr++] = i;
bubble_sort(ret.devs, ret.nr, dev_stripe_cmp); bubble_sort(ret.devs, ret.nr, dev_stripe_cmp);
...@@ -396,7 +395,7 @@ static void add_new_bucket(struct bch_fs *c, ...@@ -396,7 +395,7 @@ static void add_new_bucket(struct bch_fs *c,
ob_push(c, ptrs, ob); ob_push(c, ptrs, ob);
} }
static int bch2_bucket_alloc_set(struct bch_fs *c, int bch2_bucket_alloc_set(struct bch_fs *c,
struct open_buckets *ptrs, struct open_buckets *ptrs,
struct dev_stripe_state *stripe, struct dev_stripe_state *stripe,
struct bch_devs_mask *devs_may_alloc, struct bch_devs_mask *devs_may_alloc,
...@@ -455,74 +454,6 @@ static int bch2_bucket_alloc_set(struct bch_fs *c, ...@@ -455,74 +454,6 @@ static int bch2_bucket_alloc_set(struct bch_fs *c,
/* Allocate from stripes: */ /* Allocate from stripes: */
/*
* XXX: use a higher watermark for allocating open buckets here:
*/
static int ec_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
{
struct bch_devs_mask devs;
struct open_bucket *ob;
unsigned i, nr_have = 0, nr_data =
min_t(unsigned, h->nr_active_devs,
EC_STRIPE_MAX) - h->redundancy;
bool have_cache = true;
int ret = 0;
BUG_ON(h->blocks.nr > nr_data);
BUG_ON(h->parity.nr > h->redundancy);
devs = h->devs;
open_bucket_for_each(c, &h->parity, ob, i)
__clear_bit(ob->ptr.dev, devs.d);
open_bucket_for_each(c, &h->blocks, ob, i)
__clear_bit(ob->ptr.dev, devs.d);
percpu_down_read(&c->mark_lock);
rcu_read_lock();
if (h->parity.nr < h->redundancy) {
nr_have = h->parity.nr;
ret = bch2_bucket_alloc_set(c, &h->parity,
&h->parity_stripe,
&devs,
h->redundancy,
&nr_have,
&have_cache,
RESERVE_NONE,
0,
NULL);
if (ret)
goto err;
}
if (h->blocks.nr < nr_data) {
nr_have = h->blocks.nr;
ret = bch2_bucket_alloc_set(c, &h->blocks,
&h->block_stripe,
&devs,
nr_data,
&nr_have,
&have_cache,
RESERVE_NONE,
0,
NULL);
if (ret)
goto err;
}
rcu_read_unlock();
percpu_up_read(&c->mark_lock);
return bch2_ec_stripe_new_alloc(c, h);
err:
rcu_read_unlock();
percpu_up_read(&c->mark_lock);
return -1;
}
/* /*
* if we can't allocate a new stripe because there are already too many * if we can't allocate a new stripe because there are already too many
* partially filled stripes, force allocating from an existing stripe even when * partially filled stripes, force allocating from an existing stripe even when
...@@ -555,27 +486,23 @@ static void bucket_alloc_from_stripe(struct bch_fs *c, ...@@ -555,27 +486,23 @@ static void bucket_alloc_from_stripe(struct bch_fs *c,
if (ec_open_bucket(c, ptrs)) if (ec_open_bucket(c, ptrs))
return; return;
h = bch2_ec_stripe_head_get(c, target, erasure_code, nr_replicas - 1); h = bch2_ec_stripe_head_get(c, target, 0, nr_replicas - 1);
if (!h) if (!h)
return; return;
if (!h->s && ec_stripe_alloc(c, h))
goto out_put_head;
rcu_read_lock();
devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc); devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
rcu_read_unlock();
for (i = 0; i < devs_sorted.nr; i++) for (i = 0; i < devs_sorted.nr; i++)
open_bucket_for_each(c, &h->s->blocks, ob, ec_idx) open_bucket_for_each(c, &h->s->blocks, ob, ec_idx)
if (ob->ptr.dev == devs_sorted.devs[i] && if (ob->ptr.dev == devs_sorted.devs[i] &&
!test_and_set_bit(ec_idx, h->s->blocks_allocated)) !test_and_set_bit(h->s->data_block_idx[ec_idx],
h->s->blocks_allocated))
goto got_bucket; goto got_bucket;
goto out_put_head; goto out_put_head;
got_bucket: got_bucket:
ca = bch_dev_bkey_exists(c, ob->ptr.dev); ca = bch_dev_bkey_exists(c, ob->ptr.dev);
ob->ec_idx = ec_idx; ob->ec_idx = h->s->data_block_idx[ec_idx];
ob->ec = h->s; ob->ec = h->s;
add_new_bucket(c, ptrs, devs_may_alloc, add_new_bucket(c, ptrs, devs_may_alloc,
......
...@@ -92,6 +92,11 @@ static inline void bch2_open_bucket_get(struct bch_fs *c, ...@@ -92,6 +92,11 @@ static inline void bch2_open_bucket_get(struct bch_fs *c,
} }
} }
int bch2_bucket_alloc_set(struct bch_fs *, struct open_buckets *,
struct dev_stripe_state *, struct bch_devs_mask *,
unsigned, unsigned *, bool *, enum alloc_reserve,
unsigned, struct closure *);
struct write_point *bch2_alloc_sectors_start(struct bch_fs *, struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
unsigned, unsigned, unsigned, unsigned,
struct write_point_specifier, struct write_point_specifier,
......
...@@ -200,40 +200,6 @@ static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx) ...@@ -200,40 +200,6 @@ static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
return false; return false;
} }
static void ec_stripe_key_init(struct bch_fs *c,
struct bkey_i_stripe *s,
struct open_buckets *blocks,
struct open_buckets *parity,
unsigned stripe_size)
{
struct open_bucket *ob;
unsigned i, u64s;
bkey_stripe_init(&s->k_i);
s->v.sectors = cpu_to_le16(stripe_size);
s->v.algorithm = 0;
s->v.nr_blocks = parity->nr + blocks->nr;
s->v.nr_redundant = parity->nr;
s->v.csum_granularity_bits = ilog2(c->sb.encoded_extent_max);
s->v.csum_type = BCH_CSUM_CRC32C;
s->v.pad = 0;
open_bucket_for_each(c, blocks, ob, i)
s->v.ptrs[i] = ob->ptr;
open_bucket_for_each(c, parity, ob, i)
s->v.ptrs[blocks->nr + i] = ob->ptr;
while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
BUG_ON(1 << s->v.csum_granularity_bits >=
le16_to_cpu(s->v.sectors) ||
s->v.csum_granularity_bits == U8_MAX);
s->v.csum_granularity_bits++;
}
set_bkey_val_u64s(&s->k, u64s);
}
/* Checksumming: */ /* Checksumming: */
static void ec_generate_checksums(struct ec_stripe_buf *buf) static void ec_generate_checksums(struct ec_stripe_buf *buf)
...@@ -866,6 +832,8 @@ static void ec_stripe_create(struct ec_stripe_new *s) ...@@ -866,6 +832,8 @@ static void ec_stripe_create(struct ec_stripe_new *s)
goto err; goto err;
} }
BUG_ON(!s->allocated);
if (!percpu_ref_tryget(&c->writes)) if (!percpu_ref_tryget(&c->writes))
goto err; goto err;
...@@ -953,6 +921,8 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h) ...@@ -953,6 +921,8 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
{ {
struct ec_stripe_new *s = h->s; struct ec_stripe_new *s = h->s;
BUG_ON(!s->allocated && !s->err);
h->s = NULL; h->s = NULL;
s->pending = true; s->pending = true;
...@@ -1063,14 +1033,38 @@ static unsigned pick_blocksize(struct bch_fs *c, ...@@ -1063,14 +1033,38 @@ static unsigned pick_blocksize(struct bch_fs *c,
return best.size; return best.size;
} }
int bch2_ec_stripe_new_alloc(struct bch_fs *c, struct ec_stripe_head *h) static void ec_stripe_key_init(struct bch_fs *c,
struct bkey_i_stripe *s,
unsigned nr_data,
unsigned nr_parity,
unsigned stripe_size)
{
unsigned u64s;
bkey_stripe_init(&s->k_i);
s->v.sectors = cpu_to_le16(stripe_size);
s->v.algorithm = 0;
s->v.nr_blocks = nr_data + nr_parity;
s->v.nr_redundant = nr_parity;
s->v.csum_granularity_bits = ilog2(c->sb.encoded_extent_max);
s->v.csum_type = BCH_CSUM_CRC32C;
s->v.pad = 0;
while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
BUG_ON(1 << s->v.csum_granularity_bits >=
le16_to_cpu(s->v.sectors) ||
s->v.csum_granularity_bits == U8_MAX);
s->v.csum_granularity_bits++;
}
set_bkey_val_u64s(&s->k, u64s);
}
static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
{ {
struct ec_stripe_new *s; struct ec_stripe_new *s;
unsigned i; unsigned i;
BUG_ON(h->parity.nr != h->redundancy);
BUG_ON(!h->blocks.nr);
BUG_ON(h->parity.nr + h->blocks.nr > EC_STRIPE_MAX);
lockdep_assert_held(&h->lock); lockdep_assert_held(&h->lock);
s = kzalloc(sizeof(*s), GFP_KERNEL); s = kzalloc(sizeof(*s), GFP_KERNEL);
...@@ -1081,11 +1075,9 @@ int bch2_ec_stripe_new_alloc(struct bch_fs *c, struct ec_stripe_head *h) ...@@ -1081,11 +1075,9 @@ int bch2_ec_stripe_new_alloc(struct bch_fs *c, struct ec_stripe_head *h)
atomic_set(&s->pin, 1); atomic_set(&s->pin, 1);
s->c = c; s->c = c;
s->h = h; s->h = h;
s->blocks = h->blocks; s->nr_data = min_t(unsigned, h->nr_active_devs,
s->parity = h->parity; EC_STRIPE_MAX) - h->redundancy;
s->nr_parity = h->redundancy;
memset(&h->blocks, 0, sizeof(h->blocks));
memset(&h->parity, 0, sizeof(h->parity));
bch2_keylist_init(&s->keys, s->inline_keys); bch2_keylist_init(&s->keys, s->inline_keys);
...@@ -1093,9 +1085,8 @@ int bch2_ec_stripe_new_alloc(struct bch_fs *c, struct ec_stripe_head *h) ...@@ -1093,9 +1085,8 @@ int bch2_ec_stripe_new_alloc(struct bch_fs *c, struct ec_stripe_head *h)
s->stripe.size = h->blocksize; s->stripe.size = h->blocksize;
memset(s->stripe.valid, 0xFF, sizeof(s->stripe.valid)); memset(s->stripe.valid, 0xFF, sizeof(s->stripe.valid));
ec_stripe_key_init(c, &s->stripe.key, ec_stripe_key_init(c, &s->stripe.key, s->nr_data,
&s->blocks, &s->parity, s->nr_parity, h->blocksize);
h->blocksize);
for (i = 0; i < s->stripe.key.v.nr_blocks; i++) { for (i = 0; i < s->stripe.key.v.nr_blocks; i++) {
s->stripe.data[i] = kvpmalloc(s->stripe.size << 9, GFP_KERNEL); s->stripe.data[i] = kvpmalloc(s->stripe.size << 9, GFP_KERNEL);
...@@ -1153,6 +1144,7 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target, ...@@ -1153,6 +1144,7 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h) void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
{ {
if (h->s && if (h->s &&
h->s->allocated &&
bitmap_weight(h->s->blocks_allocated, bitmap_weight(h->s->blocks_allocated,
h->s->blocks.nr) == h->s->blocks.nr) h->s->blocks.nr) == h->s->blocks.nr)
ec_stripe_set_pending(c, h); ec_stripe_set_pending(c, h);
...@@ -1160,7 +1152,7 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h) ...@@ -1160,7 +1152,7 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
mutex_unlock(&h->lock); mutex_unlock(&h->lock);
} }
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, struct ec_stripe_head *__bch2_ec_stripe_head_get(struct bch_fs *c,
unsigned target, unsigned target,
unsigned algo, unsigned algo,
unsigned redundancy) unsigned redundancy)
...@@ -1185,6 +1177,122 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, ...@@ -1185,6 +1177,122 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
return h; return h;
} }
/*
* XXX: use a higher watermark for allocating open buckets here:
*/
static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
{
struct bch_devs_mask devs;
struct open_bucket *ob;
unsigned i, nr_have, nr_data =
min_t(unsigned, h->nr_active_devs,
EC_STRIPE_MAX) - h->redundancy;
bool have_cache = true;
int ret = 0;
devs = h->devs;
for_each_set_bit(i, h->s->blocks_allocated, EC_STRIPE_MAX) {
__clear_bit(h->s->stripe.key.v.ptrs[i].dev, devs.d);
--nr_data;
}
BUG_ON(h->s->blocks.nr > nr_data);
BUG_ON(h->s->parity.nr > h->redundancy);
open_bucket_for_each(c, &h->s->parity, ob, i)
__clear_bit(ob->ptr.dev, devs.d);
open_bucket_for_each(c, &h->s->blocks, ob, i)
__clear_bit(ob->ptr.dev, devs.d);
percpu_down_read(&c->mark_lock);
rcu_read_lock();
if (h->s->parity.nr < h->redundancy) {
nr_have = h->s->parity.nr;
ret = bch2_bucket_alloc_set(c, &h->s->parity,
&h->parity_stripe,
&devs,
h->redundancy,
&nr_have,
&have_cache,
RESERVE_NONE,
0,
NULL);
if (ret)
goto err;
}
if (h->s->blocks.nr < nr_data) {
nr_have = h->s->blocks.nr;
ret = bch2_bucket_alloc_set(c, &h->s->blocks,
&h->block_stripe,
&devs,
nr_data,
&nr_have,
&have_cache,
RESERVE_NONE,
0,
NULL);
if (ret)
goto err;
}
err:
rcu_read_unlock();
percpu_up_read(&c->mark_lock);
return ret;
}
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
unsigned target,
unsigned algo,
unsigned redundancy)
{
struct closure cl;
struct ec_stripe_head *h;
struct open_bucket *ob;
unsigned i, data_idx = 0;
closure_init_stack(&cl);
h = __bch2_ec_stripe_head_get(c, target, algo, redundancy);
if (!h)
return NULL;
if (!h->s && ec_new_stripe_alloc(c, h)) {
bch2_ec_stripe_head_put(c, h);
return NULL;
}
if (!h->s->allocated) {
if (new_stripe_alloc_buckets(c, h)) {
bch2_ec_stripe_head_put(c, h);
h = NULL;
goto out;
}
open_bucket_for_each(c, &h->s->blocks, ob, i) {
data_idx = find_next_zero_bit(h->s->blocks_allocated,
h->s->nr_data, data_idx);
BUG_ON(data_idx >= h->s->nr_data);
h->s->stripe.key.v.ptrs[data_idx] = ob->ptr;
h->s->data_block_idx[i] = data_idx;
data_idx++;
}
open_bucket_for_each(c, &h->s->parity, ob, i)
h->s->stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr;
h->s->allocated = true;
}
out:
closure_sync(&cl);
return h;
}
void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
{ {
struct ec_stripe_head *h; struct ec_stripe_head *h;
...@@ -1195,9 +1303,6 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) ...@@ -1195,9 +1303,6 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
list_for_each_entry(h, &c->ec_stripe_head_list, list) { list_for_each_entry(h, &c->ec_stripe_head_list, list) {
mutex_lock(&h->lock); mutex_lock(&h->lock);
bch2_open_buckets_stop_dev(c, ca, &h->blocks);
bch2_open_buckets_stop_dev(c, ca, &h->parity);
if (!h->s) if (!h->s)
goto unlock; goto unlock;
......
...@@ -92,11 +92,15 @@ struct ec_stripe_new { ...@@ -92,11 +92,15 @@ struct ec_stripe_new {
atomic_t pin; atomic_t pin;
int err; int err;
bool pending;
u8 nr_data;
u8 nr_parity;
bool allocated;
bool pending;
unsigned long blocks_allocated[BITS_TO_LONGS(EC_STRIPE_MAX)]; unsigned long blocks_allocated[BITS_TO_LONGS(EC_STRIPE_MAX)];
struct open_buckets blocks; struct open_buckets blocks;
u8 data_block_idx[EC_STRIPE_MAX];
struct open_buckets parity; struct open_buckets parity;
struct keylist keys; struct keylist keys;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment