Commit ba6dd1dd authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Improve stripe triggers/heap code

Soon we'll be able to modify existing stripes - replacing empty blocks
with new blocks and new p/q blocks. This patch updates the trigger code
to handle pointers changing in an existing stripe; also, it
significantly improves how the stripes heap works, which means we can
get rid of the stripe creation/deletion lock.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent e63534a2
...@@ -755,7 +755,6 @@ struct bch_fs { ...@@ -755,7 +755,6 @@ struct bch_fs {
/* STRIPES: */ /* STRIPES: */
GENRADIX(struct stripe) stripes[2]; GENRADIX(struct stripe) stripes[2];
struct mutex ec_stripe_create_lock;
ec_stripes_heap ec_stripes_heap; ec_stripes_heap ec_stripes_heap;
spinlock_t ec_stripes_heap_lock; spinlock_t ec_stripes_heap_lock;
......
...@@ -619,8 +619,11 @@ static int bch2_gc_done(struct bch_fs *c, ...@@ -619,8 +619,11 @@ static int bch2_gc_done(struct bch_fs *c,
copy_stripe_field(block_sectors[i], copy_stripe_field(block_sectors[i],
"block_sectors[%u]", i); "block_sectors[%u]", i);
if (dst->alive) if (dst->alive) {
spin_lock(&c->ec_stripes_heap_lock);
bch2_stripes_heap_insert(c, dst, dst_iter.pos); bch2_stripes_heap_insert(c, dst, dst_iter.pos);
spin_unlock(&c->ec_stripes_heap_lock);
}
genradix_iter_advance(&dst_iter, &c->stripes[0]); genradix_iter_advance(&dst_iter, &c->stripes[0]);
genradix_iter_advance(&src_iter, &c->stripes[1]); genradix_iter_advance(&src_iter, &c->stripes[1]);
......
...@@ -884,51 +884,46 @@ static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p, ...@@ -884,51 +884,46 @@ static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
} }
static void bucket_set_stripe(struct bch_fs *c, static void bucket_set_stripe(struct bch_fs *c,
const struct bch_stripe *v, const struct bch_extent_ptr *ptr,
struct bch_fs_usage *fs_usage, struct bch_fs_usage *fs_usage,
u64 journal_seq, u64 journal_seq,
unsigned flags, unsigned flags,
bool enabled) bool enabled)
{ {
bool gc = flags & BTREE_TRIGGER_GC; bool gc = flags & BTREE_TRIGGER_GC;
unsigned i; struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bucket *g = PTR_BUCKET(ca, ptr, gc);
struct bucket_mark new, old;
for (i = 0; i < v->nr_blocks; i++) { old = bucket_cmpxchg(g, new, ({
const struct bch_extent_ptr *ptr = v->ptrs + i; new.stripe = enabled;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); if (journal_seq) {
struct bucket *g = PTR_BUCKET(ca, ptr, gc); new.journal_seq_valid = 1;
struct bucket_mark new, old; new.journal_seq = journal_seq;
}
old = bucket_cmpxchg(g, new, ({ }));
new.stripe = enabled;
if (journal_seq) {
new.journal_seq_valid = 1;
new.journal_seq = journal_seq;
}
}));
bch2_dev_usage_update(c, ca, fs_usage, old, new, gc); bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
/* /*
* XXX write repair code for these, flag stripe as possibly bad * XXX write repair code for these, flag stripe as possibly bad
*/ */
if (old.gen != ptr->gen) if (old.gen != ptr->gen)
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"stripe with stale pointer"); "stripe with stale pointer");
#if 0 #if 0
/* /*
* We'd like to check for these, but these checks don't work * We'd like to check for these, but these checks don't work
* yet: * yet:
*/ */
if (old.stripe && enabled) if (old.stripe && enabled)
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"multiple stripes using same bucket"); "multiple stripes using same bucket");
if (!old.stripe && !enabled) if (!old.stripe && !enabled)
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"deleting stripe but bucket not marked as stripe bucket"); "deleting stripe but bucket not marked as stripe bucket");
#endif #endif
}
} }
static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k, static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k,
...@@ -1070,8 +1065,7 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c, ...@@ -1070,8 +1065,7 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
{ {
bool gc = flags & BTREE_TRIGGER_GC; bool gc = flags & BTREE_TRIGGER_GC;
struct stripe *m; struct stripe *m;
unsigned old, new; unsigned i, blocks_nonempty = 0;
int blocks_nonempty_delta;
m = genradix_ptr(&c->stripes[gc], p.idx); m = genradix_ptr(&c->stripes[gc], p.idx);
...@@ -1090,20 +1084,17 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c, ...@@ -1090,20 +1084,17 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
*nr_parity = m->nr_redundant; *nr_parity = m->nr_redundant;
*r = m->r; *r = m->r;
old = m->block_sectors[p.block];
m->block_sectors[p.block] += sectors; m->block_sectors[p.block] += sectors;
new = m->block_sectors[p.block];
blocks_nonempty_delta = (int) !!new - (int) !!old; for (i = 0; i < m->nr_blocks; i++)
if (blocks_nonempty_delta) { blocks_nonempty += m->block_sectors[i] != 0;
m->blocks_nonempty += blocks_nonempty_delta;
if (m->blocks_nonempty != blocks_nonempty) {
m->blocks_nonempty = blocks_nonempty;
if (!gc) if (!gc)
bch2_stripes_heap_update(c, m, p.idx); bch2_stripes_heap_update(c, m, p.idx);
} }
m->dirty = true;
spin_unlock(&c->ec_stripes_heap_lock); spin_unlock(&c->ec_stripes_heap_lock);
return 0; return 0;
...@@ -1207,10 +1198,11 @@ static int bch2_mark_stripe(struct bch_fs *c, ...@@ -1207,10 +1198,11 @@ static int bch2_mark_stripe(struct bch_fs *c,
if (!new_s) { if (!new_s) {
/* Deleting: */ /* Deleting: */
bucket_set_stripe(c, old_s, fs_usage, for (i = 0; i < old_s->nr_blocks; i++)
journal_seq, flags, false); bucket_set_stripe(c, old_s->ptrs + i, fs_usage,
journal_seq, flags, false);
if (!gc) { if (!gc && m->on_heap) {
spin_lock(&c->ec_stripes_heap_lock); spin_lock(&c->ec_stripes_heap_lock);
bch2_stripes_heap_del(c, m, idx); bch2_stripes_heap_del(c, m, idx);
spin_unlock(&c->ec_stripes_heap_lock); spin_unlock(&c->ec_stripes_heap_lock);
...@@ -1221,10 +1213,21 @@ static int bch2_mark_stripe(struct bch_fs *c, ...@@ -1221,10 +1213,21 @@ static int bch2_mark_stripe(struct bch_fs *c,
BUG_ON(old_s && new_s->nr_blocks != old_s->nr_blocks); BUG_ON(old_s && new_s->nr_blocks != old_s->nr_blocks);
BUG_ON(old_s && new_s->nr_redundant != old_s->nr_redundant); BUG_ON(old_s && new_s->nr_redundant != old_s->nr_redundant);
if (!old_s) for (i = 0; i < new_s->nr_blocks; i++) {
bucket_set_stripe(c, new_s, fs_usage, if (!old_s ||
journal_seq, flags, true); memcmp(new_s->ptrs + i,
old_s->ptrs + i,
sizeof(struct bch_extent_ptr))) {
if (old_s)
bucket_set_stripe(c, old_s->ptrs + i, fs_usage,
journal_seq, flags, false);
bucket_set_stripe(c, new_s->ptrs + i, fs_usage,
journal_seq, flags, true);
}
}
m->alive = true;
m->sectors = le16_to_cpu(new_s->sectors); m->sectors = le16_to_cpu(new_s->sectors);
m->algorithm = new_s->algorithm; m->algorithm = new_s->algorithm;
m->nr_blocks = new_s->nr_blocks; m->nr_blocks = new_s->nr_blocks;
...@@ -1248,8 +1251,6 @@ static int bch2_mark_stripe(struct bch_fs *c, ...@@ -1248,8 +1251,6 @@ static int bch2_mark_stripe(struct bch_fs *c,
bch2_stripes_heap_update(c, m, idx); bch2_stripes_heap_update(c, m, idx);
spin_unlock(&c->ec_stripes_heap_lock); spin_unlock(&c->ec_stripes_heap_lock);
} }
m->alive = true;
} }
return 0; return 0;
......
...@@ -607,39 +607,16 @@ static void heap_verify_backpointer(struct bch_fs *c, size_t idx) ...@@ -607,39 +607,16 @@ static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
BUG_ON(h->data[m->heap_idx].idx != idx); BUG_ON(h->data[m->heap_idx].idx != idx);
} }
void bch2_stripes_heap_update(struct bch_fs *c,
struct stripe *m, size_t idx)
{
ec_stripes_heap *h = &c->ec_stripes_heap;
size_t i;
if (m->alive) {
heap_verify_backpointer(c, idx);
h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
i = m->heap_idx;
heap_sift_up(h, i, ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
heap_sift_down(h, i, ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
heap_verify_backpointer(c, idx);
} else {
bch2_stripes_heap_insert(c, m, idx);
}
if (stripe_idx_to_delete(c) >= 0 &&
!percpu_ref_is_dying(&c->writes))
schedule_work(&c->ec_stripe_delete_work);
}
void bch2_stripes_heap_del(struct bch_fs *c, void bch2_stripes_heap_del(struct bch_fs *c,
struct stripe *m, size_t idx) struct stripe *m, size_t idx)
{ {
if (!m->on_heap)
return;
m->on_heap = false;
heap_verify_backpointer(c, idx); heap_verify_backpointer(c, idx);
m->alive = false;
heap_del(&c->ec_stripes_heap, m->heap_idx, heap_del(&c->ec_stripes_heap, m->heap_idx,
ec_stripes_heap_cmp, ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer); ec_stripes_heap_set_backpointer);
...@@ -648,19 +625,49 @@ void bch2_stripes_heap_del(struct bch_fs *c, ...@@ -648,19 +625,49 @@ void bch2_stripes_heap_del(struct bch_fs *c,
void bch2_stripes_heap_insert(struct bch_fs *c, void bch2_stripes_heap_insert(struct bch_fs *c,
struct stripe *m, size_t idx) struct stripe *m, size_t idx)
{ {
if (m->on_heap)
return;
BUG_ON(heap_full(&c->ec_stripes_heap)); BUG_ON(heap_full(&c->ec_stripes_heap));
m->on_heap = true;
heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) { heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) {
.idx = idx, .idx = idx,
.blocks_nonempty = m->blocks_nonempty, .blocks_nonempty = m->blocks_nonempty,
}), }),
ec_stripes_heap_cmp, ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer); ec_stripes_heap_set_backpointer);
m->alive = true;
heap_verify_backpointer(c, idx); heap_verify_backpointer(c, idx);
} }
void bch2_stripes_heap_update(struct bch_fs *c,
struct stripe *m, size_t idx)
{
ec_stripes_heap *h = &c->ec_stripes_heap;
size_t i;
if (!m->on_heap)
return;
heap_verify_backpointer(c, idx);
h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
i = m->heap_idx;
heap_sift_up(h, i, ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
heap_sift_down(h, i, ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
heap_verify_backpointer(c, idx);
if (stripe_idx_to_delete(c) >= 0 &&
!percpu_ref_is_dying(&c->writes))
schedule_work(&c->ec_stripe_delete_work);
}
/* stripe deletion */ /* stripe deletion */
static int ec_stripe_delete(struct bch_fs *c, size_t idx) static int ec_stripe_delete(struct bch_fs *c, size_t idx)
...@@ -677,23 +684,20 @@ static void ec_stripe_delete_work(struct work_struct *work) ...@@ -677,23 +684,20 @@ static void ec_stripe_delete_work(struct work_struct *work)
container_of(work, struct bch_fs, ec_stripe_delete_work); container_of(work, struct bch_fs, ec_stripe_delete_work);
ssize_t idx; ssize_t idx;
down_read(&c->gc_lock);
mutex_lock(&c->ec_stripe_create_lock);
while (1) { while (1) {
spin_lock(&c->ec_stripes_heap_lock); spin_lock(&c->ec_stripes_heap_lock);
idx = stripe_idx_to_delete(c); idx = stripe_idx_to_delete(c);
spin_unlock(&c->ec_stripes_heap_lock); if (idx < 0) {
spin_unlock(&c->ec_stripes_heap_lock);
if (idx < 0)
break; break;
}
bch2_stripes_heap_del(c, genradix_ptr(&c->stripes[0], idx), idx);
spin_unlock(&c->ec_stripes_heap_lock);
if (ec_stripe_delete(c, idx)) if (ec_stripe_delete(c, idx))
break; break;
} }
mutex_unlock(&c->ec_stripe_create_lock);
up_read(&c->gc_lock);
} }
/* stripe creation: */ /* stripe creation: */
...@@ -846,6 +850,7 @@ static void ec_stripe_create(struct ec_stripe_new *s) ...@@ -846,6 +850,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
struct bch_fs *c = s->c; struct bch_fs *c = s->c;
struct open_bucket *ob; struct open_bucket *ob;
struct bkey_i *k; struct bkey_i *k;
struct stripe *m;
struct bch_stripe *v = &s->stripe.key.v; struct bch_stripe *v = &s->stripe.key.v;
unsigned i, nr_data = v->nr_blocks - v->nr_redundant; unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
struct closure cl; struct closure cl;
...@@ -882,12 +887,10 @@ static void ec_stripe_create(struct ec_stripe_new *s) ...@@ -882,12 +887,10 @@ static void ec_stripe_create(struct ec_stripe_new *s)
goto err_put_writes; goto err_put_writes;
} }
mutex_lock(&c->ec_stripe_create_lock);
ret = ec_stripe_bkey_insert(c, &s->stripe.key); ret = ec_stripe_bkey_insert(c, &s->stripe.key);
if (ret) { if (ret) {
bch_err(c, "error creating stripe: error creating stripe key"); bch_err(c, "error creating stripe: error creating stripe key");
goto err_unlock; goto err_put_writes;
} }
for_each_keylist_key(&s->keys, k) { for_each_keylist_key(&s->keys, k) {
...@@ -896,8 +899,11 @@ static void ec_stripe_create(struct ec_stripe_new *s) ...@@ -896,8 +899,11 @@ static void ec_stripe_create(struct ec_stripe_new *s)
break; break;
} }
err_unlock: spin_lock(&c->ec_stripes_heap_lock);
mutex_unlock(&c->ec_stripe_create_lock); m = genradix_ptr(&c->stripes[0], s->stripe.key.k.p.offset);
BUG_ON(m->on_heap);
bch2_stripes_heap_insert(c, m, s->stripe.key.k.p.offset);
spin_unlock(&c->ec_stripes_heap_lock);
err_put_writes: err_put_writes:
percpu_ref_put(&c->writes); percpu_ref_put(&c->writes);
err: err:
...@@ -1280,11 +1286,21 @@ static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id, ...@@ -1280,11 +1286,21 @@ static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id,
{ {
int ret = 0; int ret = 0;
if (k.k->type == KEY_TYPE_stripe) if (k.k->type == KEY_TYPE_stripe) {
struct stripe *m;
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?: ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
bch2_mark_key(c, k, 0, 0, NULL, 0, bch2_mark_key(c, k, 0, 0, NULL, 0,
BTREE_TRIGGER_ALLOC_READ| BTREE_TRIGGER_ALLOC_READ|
BTREE_TRIGGER_NOATOMIC); BTREE_TRIGGER_NOATOMIC);
if (ret)
return ret;
spin_lock(&c->ec_stripes_heap_lock);
m = genradix_ptr(&c->stripes[0], k.k->p.offset);
bch2_stripes_heap_insert(c, m, k.k->p.offset);
spin_unlock(&c->ec_stripes_heap_lock);
}
return ret; return ret;
} }
...@@ -1335,6 +1351,24 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc) ...@@ -1335,6 +1351,24 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
return 0; return 0;
} }
void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
{
ec_stripes_heap *h = &c->ec_stripes_heap;
struct stripe *m;
size_t i;
spin_lock(&c->ec_stripes_heap_lock);
for (i = 0; i < min(h->used, 20UL); i++) {
m = genradix_ptr(&c->stripes[0], h->data[i].idx);
pr_buf(out, "%zu %u/%u+%u\n", h->data[i].idx,
h->data[i].blocks_nonempty,
m->nr_blocks - m->nr_redundant,
m->nr_redundant);
}
spin_unlock(&c->ec_stripes_heap_lock);
}
void bch2_fs_ec_exit(struct bch_fs *c) void bch2_fs_ec_exit(struct bch_fs *c)
{ {
struct ec_stripe_head *h; struct ec_stripe_head *h;
......
...@@ -157,6 +157,8 @@ int bch2_stripes_write(struct bch_fs *, unsigned, bool *); ...@@ -157,6 +157,8 @@ int bch2_stripes_write(struct bch_fs *, unsigned, bool *);
int bch2_ec_mem_alloc(struct bch_fs *, bool); int bch2_ec_mem_alloc(struct bch_fs *, bool);
void bch2_stripes_heap_to_text(struct printbuf *, struct bch_fs *);
void bch2_fs_ec_exit(struct bch_fs *); void bch2_fs_ec_exit(struct bch_fs *);
int bch2_fs_ec_init(struct bch_fs *); int bch2_fs_ec_init(struct bch_fs *);
......
...@@ -22,6 +22,7 @@ struct stripe { ...@@ -22,6 +22,7 @@ struct stripe {
unsigned alive:1; unsigned alive:1;
unsigned dirty:1; unsigned dirty:1;
unsigned on_heap:1;
u8 blocks_nonempty; u8 blocks_nonempty;
u16 block_sectors[EC_STRIPE_MAX]; u16 block_sectors[EC_STRIPE_MAX];
......
...@@ -678,7 +678,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) ...@@ -678,7 +678,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
INIT_LIST_HEAD(&c->ec_new_stripe_list); INIT_LIST_HEAD(&c->ec_new_stripe_list);
mutex_init(&c->ec_new_stripe_lock); mutex_init(&c->ec_new_stripe_lock);
mutex_init(&c->ec_stripe_create_lock);
spin_lock_init(&c->ec_stripes_heap_lock); spin_lock_init(&c->ec_stripes_heap_lock);
seqcount_init(&c->gc_pos_lock); seqcount_init(&c->gc_pos_lock);
......
...@@ -168,6 +168,7 @@ read_attribute(btree_updates); ...@@ -168,6 +168,7 @@ read_attribute(btree_updates);
read_attribute(dirty_btree_nodes); read_attribute(dirty_btree_nodes);
read_attribute(btree_key_cache); read_attribute(btree_key_cache);
read_attribute(btree_transactions); read_attribute(btree_transactions);
read_attribute(stripes_heap);
read_attribute(internal_uuid); read_attribute(internal_uuid);
...@@ -418,6 +419,13 @@ SHOW(bch2_fs) ...@@ -418,6 +419,13 @@ SHOW(bch2_fs)
return out.pos - buf; return out.pos - buf;
} }
if (attr == &sysfs_stripes_heap) {
struct printbuf out = _PBUF(buf, PAGE_SIZE);
bch2_stripes_heap_to_text(&out, c);
return out.pos - buf;
}
if (attr == &sysfs_compression_stats) if (attr == &sysfs_compression_stats)
return bch2_compression_stats(c, buf); return bch2_compression_stats(c, buf);
...@@ -583,6 +591,7 @@ struct attribute *bch2_fs_internal_files[] = { ...@@ -583,6 +591,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_dirty_btree_nodes, &sysfs_dirty_btree_nodes,
&sysfs_btree_key_cache, &sysfs_btree_key_cache,
&sysfs_btree_transactions, &sysfs_btree_transactions,
&sysfs_stripes_heap,
&sysfs_read_realloc_races, &sysfs_read_realloc_races,
&sysfs_extent_migrate_done, &sysfs_extent_migrate_done,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment