Commit 81d8599e authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Don't read existing stripes synchronously in write path

Previously, in the stripe creation path, when reusing an existing stripe
we'd read the existing stripe synchronously - ouch.

Now, we allocate two stripe bufs if we're using an existing stripe, so
that we can do the read asynchronously - and, we read the full stripe so
that we can run recovery, if necessary.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 35a067b4
...@@ -871,7 +871,7 @@ struct bch_stripe { ...@@ -871,7 +871,7 @@ struct bch_stripe {
__u8 csum_type; __u8 csum_type;
__u8 pad; __u8 pad;
struct bch_extent_ptr ptrs[0]; struct bch_extent_ptr ptrs[];
} __attribute__((packed, aligned(8))); } __attribute__((packed, aligned(8)));
/* Reflink: */ /* Reflink: */
......
...@@ -200,6 +200,36 @@ static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx) ...@@ -200,6 +200,36 @@ static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
return false; return false;
} }
/* Stripe bufs: */
static void ec_stripe_buf_free(struct ec_stripe_buf *stripe)
{
unsigned i;
for (i = 0; i < stripe->key.v.nr_blocks; i++) {
kvpfree(stripe->data[i], stripe->size << 9);
stripe->data[i] = NULL;
}
}
static int ec_stripe_buf_alloc(struct ec_stripe_buf *stripe)
{
unsigned i;
memset(stripe->valid, 0xFF, sizeof(stripe->valid));
for (i = 0; i < stripe->key.v.nr_blocks; i++) {
stripe->data[i] = kvpmalloc(stripe->size << 9, GFP_KERNEL);
if (!stripe->data[i])
goto err;
}
return 0;
err:
ec_stripe_buf_free(stripe);
return -ENOMEM;
}
/* Checksumming: */ /* Checksumming: */
static void ec_generate_checksums(struct ec_stripe_buf *buf) static void ec_generate_checksums(struct ec_stripe_buf *buf)
...@@ -287,14 +317,10 @@ static void ec_generate_ec(struct ec_stripe_buf *buf) ...@@ -287,14 +317,10 @@ static void ec_generate_ec(struct ec_stripe_buf *buf)
raid_gen(nr_data, v->nr_redundant, bytes, buf->data); raid_gen(nr_data, v->nr_redundant, bytes, buf->data);
} }
static unsigned __ec_nr_failed(struct ec_stripe_buf *buf, unsigned nr)
{
return nr - bitmap_weight(buf->valid, nr);
}
static unsigned ec_nr_failed(struct ec_stripe_buf *buf) static unsigned ec_nr_failed(struct ec_stripe_buf *buf)
{ {
return __ec_nr_failed(buf, buf->key.v.nr_blocks); return buf->key.v.nr_blocks -
bitmap_weight(buf->valid, buf->key.v.nr_blocks);
} }
static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf) static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf)
...@@ -822,14 +848,13 @@ static void ec_stripe_create(struct ec_stripe_new *s) ...@@ -822,14 +848,13 @@ static void ec_stripe_create(struct ec_stripe_new *s)
struct open_bucket *ob; struct open_bucket *ob;
struct bkey_i *k; struct bkey_i *k;
struct stripe *m; struct stripe *m;
struct bch_stripe *v = &s->stripe.key.v; struct bch_stripe *v = &s->new_stripe.key.v;
unsigned i, nr_data = v->nr_blocks - v->nr_redundant; unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
struct closure cl;
int ret; int ret;
BUG_ON(s->h->s == s); BUG_ON(s->h->s == s);
closure_init_stack(&cl); closure_sync(&s->iodone);
if (s->err) { if (s->err) {
if (s->err != -EROFS) if (s->err != -EROFS)
...@@ -837,6 +862,22 @@ static void ec_stripe_create(struct ec_stripe_new *s) ...@@ -837,6 +862,22 @@ static void ec_stripe_create(struct ec_stripe_new *s)
goto err; goto err;
} }
if (s->have_existing_stripe) {
ec_validate_checksums(c, &s->existing_stripe);
if (ec_do_recov(c, &s->existing_stripe)) {
bch_err(c, "error creating stripe: error reading existing stripe");
goto err;
}
for (i = 0; i < nr_data; i++)
if (stripe_blockcount_get(&s->existing_stripe.key.v, i))
swap(s->new_stripe.data[i],
s->existing_stripe.data[i]);
ec_stripe_buf_free(&s->existing_stripe);
}
BUG_ON(!s->allocated); BUG_ON(!s->allocated);
if (!percpu_ref_tryget(&c->writes)) if (!percpu_ref_tryget(&c->writes))
...@@ -845,33 +886,31 @@ static void ec_stripe_create(struct ec_stripe_new *s) ...@@ -845,33 +886,31 @@ static void ec_stripe_create(struct ec_stripe_new *s)
BUG_ON(bitmap_weight(s->blocks_allocated, BUG_ON(bitmap_weight(s->blocks_allocated,
s->blocks.nr) != s->blocks.nr); s->blocks.nr) != s->blocks.nr);
ec_generate_ec(&s->stripe); ec_generate_ec(&s->new_stripe);
ec_generate_checksums(&s->stripe); ec_generate_checksums(&s->new_stripe);
/* write p/q: */ /* write p/q: */
for (i = nr_data; i < v->nr_blocks; i++) for (i = nr_data; i < v->nr_blocks; i++)
ec_block_io(c, &s->stripe, REQ_OP_WRITE, i, &cl); ec_block_io(c, &s->new_stripe, REQ_OP_WRITE, i, &s->iodone);
closure_sync(&s->iodone);
closure_sync(&cl);
for (i = nr_data; i < v->nr_blocks; i++) if (ec_nr_failed(&s->new_stripe)) {
if (!test_bit(i, s->stripe.valid)) {
bch_err(c, "error creating stripe: error writing redundancy buckets"); bch_err(c, "error creating stripe: error writing redundancy buckets");
goto err_put_writes; goto err_put_writes;
} }
ret = s->existing_stripe ret = s->have_existing_stripe
? bch2_btree_insert(c, BTREE_ID_EC, &s->stripe.key.k_i, ? bch2_btree_insert(c, BTREE_ID_EC, &s->new_stripe.key.k_i,
&s->res, NULL, BTREE_INSERT_NOFAIL) &s->res, NULL, BTREE_INSERT_NOFAIL)
: ec_stripe_bkey_insert(c, s, &s->stripe.key); : ec_stripe_bkey_insert(c, s, &s->new_stripe.key);
if (ret) { if (ret) {
bch_err(c, "error creating stripe: error creating stripe key"); bch_err(c, "error creating stripe: error creating stripe key");
goto err_put_writes; goto err_put_writes;
} }
for_each_keylist_key(&s->keys, k) { for_each_keylist_key(&s->keys, k) {
ret = ec_stripe_update_ptrs(c, &s->stripe, &k->k); ret = ec_stripe_update_ptrs(c, &s->new_stripe, &k->k);
if (ret) { if (ret) {
bch_err(c, "error creating stripe: error %i updating pointers", ret); bch_err(c, "error creating stripe: error %i updating pointers", ret);
break; break;
...@@ -879,14 +918,14 @@ static void ec_stripe_create(struct ec_stripe_new *s) ...@@ -879,14 +918,14 @@ static void ec_stripe_create(struct ec_stripe_new *s)
} }
spin_lock(&c->ec_stripes_heap_lock); spin_lock(&c->ec_stripes_heap_lock);
m = genradix_ptr(&c->stripes[0], s->stripe.key.k.p.offset); m = genradix_ptr(&c->stripes[0], s->new_stripe.key.k.p.offset);
#if 0 #if 0
pr_info("created a %s stripe %llu", pr_info("created a %s stripe %llu",
s->existing_stripe ? "existing" : "new", s->have_existing_stripe ? "existing" : "new",
s->stripe.key.k.p.offset); s->stripe.key.k.p.offset);
#endif #endif
BUG_ON(m->on_heap); BUG_ON(m->on_heap);
bch2_stripes_heap_insert(c, m, s->stripe.key.k.p.offset); bch2_stripes_heap_insert(c, m, s->new_stripe.key.k.p.offset);
spin_unlock(&c->ec_stripes_heap_lock); spin_unlock(&c->ec_stripes_heap_lock);
err_put_writes: err_put_writes:
percpu_ref_put(&c->writes); percpu_ref_put(&c->writes);
...@@ -902,8 +941,9 @@ static void ec_stripe_create(struct ec_stripe_new *s) ...@@ -902,8 +941,9 @@ static void ec_stripe_create(struct ec_stripe_new *s)
bch2_keylist_free(&s->keys, s->inline_keys); bch2_keylist_free(&s->keys, s->inline_keys);
for (i = 0; i < s->stripe.key.v.nr_blocks; i++) ec_stripe_buf_free(&s->existing_stripe);
kvpfree(s->stripe.data[i], s->stripe.size << 9); ec_stripe_buf_free(&s->new_stripe);
closure_debug_destroy(&s->iodone);
kfree(s); kfree(s);
} }
...@@ -980,7 +1020,7 @@ void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp) ...@@ -980,7 +1020,7 @@ void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp)
ca = bch_dev_bkey_exists(c, ob->ptr.dev); ca = bch_dev_bkey_exists(c, ob->ptr.dev);
offset = ca->mi.bucket_size - ob->sectors_free; offset = ca->mi.bucket_size - ob->sectors_free;
return ob->ec->stripe.data[ob->ec_idx] + (offset << 9); return ob->ec->new_stripe.data[ob->ec_idx] + (offset << 9);
} }
void bch2_ec_add_backpointer(struct bch_fs *c, struct write_point *wp, void bch2_ec_add_backpointer(struct bch_fs *c, struct write_point *wp,
...@@ -1087,7 +1127,6 @@ static void ec_stripe_key_init(struct bch_fs *c, ...@@ -1087,7 +1127,6 @@ static void ec_stripe_key_init(struct bch_fs *c,
static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
{ {
struct ec_stripe_new *s; struct ec_stripe_new *s;
unsigned i;
lockdep_assert_held(&h->lock); lockdep_assert_held(&h->lock);
...@@ -1096,6 +1135,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) ...@@ -1096,6 +1135,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
return -ENOMEM; return -ENOMEM;
mutex_init(&s->lock); mutex_init(&s->lock);
closure_init(&s->iodone, NULL);
atomic_set(&s->pin, 1); atomic_set(&s->pin, 1);
s->c = c; s->c = c;
s->h = h; s->h = h;
...@@ -1105,27 +1145,14 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) ...@@ -1105,27 +1145,14 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
bch2_keylist_init(&s->keys, s->inline_keys); bch2_keylist_init(&s->keys, s->inline_keys);
s->stripe.offset = 0; s->new_stripe.offset = 0;
s->stripe.size = h->blocksize; s->new_stripe.size = h->blocksize;
memset(s->stripe.valid, 0xFF, sizeof(s->stripe.valid));
ec_stripe_key_init(c, &s->stripe.key, s->nr_data, ec_stripe_key_init(c, &s->new_stripe.key, s->nr_data,
s->nr_parity, h->blocksize); s->nr_parity, h->blocksize);
for (i = 0; i < s->stripe.key.v.nr_blocks; i++) {
s->stripe.data[i] = kvpmalloc(s->stripe.size << 9, GFP_KERNEL);
if (!s->stripe.data[i])
goto err;
}
h->s = s; h->s = s;
return 0; return 0;
err:
for (i = 0; i < s->stripe.key.v.nr_blocks; i++)
kvpfree(s->stripe.data[i], s->stripe.size << 9);
kfree(s);
return -ENOMEM;
} }
static struct ec_stripe_head * static struct ec_stripe_head *
...@@ -1217,7 +1244,7 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h) ...@@ -1217,7 +1244,7 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
devs = h->devs; devs = h->devs;
for_each_set_bit(i, h->s->blocks_allocated, BCH_BKEY_PTRS_MAX) { for_each_set_bit(i, h->s->blocks_allocated, BCH_BKEY_PTRS_MAX) {
__clear_bit(h->s->stripe.key.v.ptrs[i].dev, devs.d); __clear_bit(h->s->new_stripe.key.v.ptrs[i].dev, devs.d);
--nr_data; --nr_data;
} }
...@@ -1327,51 +1354,70 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, ...@@ -1327,51 +1354,70 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
unsigned algo, unsigned algo,
unsigned redundancy) unsigned redundancy)
{ {
struct closure cl;
struct ec_stripe_head *h; struct ec_stripe_head *h;
struct open_bucket *ob; struct open_bucket *ob;
unsigned i, data_idx = 0; unsigned i, data_idx = 0;
s64 idx; s64 idx;
int ret; int ret;
closure_init_stack(&cl);
h = __bch2_ec_stripe_head_get(c, target, algo, redundancy); h = __bch2_ec_stripe_head_get(c, target, algo, redundancy);
if (!h) if (!h) {
bch_err(c, "no stripe head");
return NULL; return NULL;
}
if (!h->s) { if (!h->s) {
if (ec_new_stripe_alloc(c, h)) { if (ec_new_stripe_alloc(c, h)) {
bch2_ec_stripe_head_put(c, h); bch2_ec_stripe_head_put(c, h);
bch_err(c, "failed to allocate new stripe");
return NULL; return NULL;
} }
idx = get_existing_stripe(c, target, algo, redundancy); idx = get_existing_stripe(c, target, algo, redundancy);
if (idx >= 0) { if (idx >= 0) {
h->s->existing_stripe = true; h->s->have_existing_stripe = true;
h->s->existing_stripe_idx = idx; ret = get_stripe_key(c, idx, &h->s->existing_stripe);
if (get_stripe_key(c, idx, &h->s->stripe)) { if (ret) {
/* btree error */ bch2_fs_fatal_error(c, "error reading stripe key: %i", ret);
bch2_ec_stripe_head_put(c, h);
return NULL;
}
if (ec_stripe_buf_alloc(&h->s->existing_stripe)) {
/*
* this is a problem: we have deleted from the
* stripes heap already
*/
BUG(); BUG();
} }
for (i = 0; i < h->s->stripe.key.v.nr_blocks; i++) for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
if (stripe_blockcount_get(&h->s->stripe.key.v, i)) { if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i))
__set_bit(i, h->s->blocks_allocated); __set_bit(i, h->s->blocks_allocated);
ec_block_io(c, &h->s->stripe, READ, i, &cl);
ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
} }
bkey_copy(&h->s->new_stripe.key.k_i,
&h->s->existing_stripe.key.k_i);
}
if (ec_stripe_buf_alloc(&h->s->new_stripe)) {
BUG();
} }
} }
if (!h->s->allocated) { if (!h->s->allocated) {
if (!h->s->existing_stripe && if (!h->s->have_existing_stripe &&
!h->s->res.sectors) { !h->s->res.sectors) {
ret = bch2_disk_reservation_get(c, &h->s->res, ret = bch2_disk_reservation_get(c, &h->s->res,
h->blocksize, h->blocksize,
h->s->nr_parity, 0); h->s->nr_parity, 0);
if (ret) { if (ret) {
/* What should we do here? */ /*
bch_err(c, "unable to create new stripe: %i", ret); * This means we need to wait for copygc to
* empty out buckets from existing stripes:
*/
bch2_ec_stripe_head_put(c, h); bch2_ec_stripe_head_put(c, h);
h = NULL; h = NULL;
goto out; goto out;
...@@ -1391,19 +1437,18 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, ...@@ -1391,19 +1437,18 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
h->s->nr_data, data_idx); h->s->nr_data, data_idx);
BUG_ON(data_idx >= h->s->nr_data); BUG_ON(data_idx >= h->s->nr_data);
h->s->stripe.key.v.ptrs[data_idx] = ob->ptr; h->s->new_stripe.key.v.ptrs[data_idx] = ob->ptr;
h->s->data_block_idx[i] = data_idx; h->s->data_block_idx[i] = data_idx;
data_idx++; data_idx++;
} }
open_bucket_for_each(c, &h->s->parity, ob, i) open_bucket_for_each(c, &h->s->parity, ob, i)
h->s->stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr; h->s->new_stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr;
//pr_info("new stripe, blocks_allocated %lx", h->s->blocks_allocated[0]); //pr_info("new stripe, blocks_allocated %lx", h->s->blocks_allocated[0]);
h->s->allocated = true; h->s->allocated = true;
} }
out: out:
closure_sync(&cl);
return h; return h;
} }
......
...@@ -88,6 +88,7 @@ struct ec_stripe_new { ...@@ -88,6 +88,7 @@ struct ec_stripe_new {
struct ec_stripe_head *h; struct ec_stripe_head *h;
struct mutex lock; struct mutex lock;
struct list_head list; struct list_head list;
struct closure iodone;
/* counts in flight writes, stripe is created when pin == 0 */ /* counts in flight writes, stripe is created when pin == 0 */
atomic_t pin; atomic_t pin;
...@@ -98,8 +99,7 @@ struct ec_stripe_new { ...@@ -98,8 +99,7 @@ struct ec_stripe_new {
u8 nr_parity; u8 nr_parity;
bool allocated; bool allocated;
bool pending; bool pending;
bool existing_stripe; bool have_existing_stripe;
u64 existing_stripe_idx;
unsigned long blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)]; unsigned long blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
...@@ -111,7 +111,8 @@ struct ec_stripe_new { ...@@ -111,7 +111,8 @@ struct ec_stripe_new {
struct keylist keys; struct keylist keys;
u64 inline_keys[BKEY_U64s * 8]; u64 inline_keys[BKEY_U64s * 8];
struct ec_stripe_buf stripe; struct ec_stripe_buf new_stripe;
struct ec_stripe_buf existing_stripe;
}; };
struct ec_stripe_head { struct ec_stripe_head {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment