Commit ec061b21 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: btree_gc no longer uses main in-memory bucket array

This changes the btree_gc code to only use the second bucket array, the
one dedicated to GC. On completion, it compares what's in its in memory
bucket array to the allocation information in the btree and writes it
directly, instead of updating the main in-memory bucket array and
writing that.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent 63a2edce
......@@ -39,15 +39,6 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
#undef x
};
struct bkey_alloc_buf {
struct bkey_i k;
struct bch_alloc_v3 v;
#define x(_name, _bits) + _bits / 8
u8 _pad[0 + BCH_ALLOC_FIELDS_V2()];
#undef x
} __attribute__((packed, aligned(8)));
/* Persistent alloc info: */
static inline u64 alloc_field_v1_get(const struct bch_alloc *a,
......@@ -254,24 +245,31 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
return ret;
}
static void bch2_alloc_pack(struct bch_fs *c,
struct bkey_alloc_buf *dst,
const struct bkey_alloc_unpacked src)
struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *trans,
const struct bkey_alloc_unpacked src)
{
bch2_alloc_pack_v3(dst, src);
struct bkey_alloc_buf *dst;
dst = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
if (!IS_ERR(dst))
bch2_alloc_pack_v3(dst, src);
return dst;
}
int bch2_alloc_write(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_alloc_unpacked *u, unsigned trigger_flags)
{
struct bkey_alloc_buf *a;
a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
if (IS_ERR(a))
return PTR_ERR(a);
struct bkey_alloc_buf *a = bch2_alloc_pack(trans, *u);
bch2_alloc_pack(trans->c, a, *u);
return bch2_trans_update(trans, iter, &a->k, trigger_flags|
/*
* Without BTREE_UPDATE_NO_KEY_CACHE_COHERENCY, we may end up updating
* the btree instead of the key cache - this can casue the allocator to
* self-deadlock, since updating the btree may require allocating new
* btree nodes:
*/
return PTR_ERR_OR_ZERO(a) ?:
bch2_trans_update(trans, iter, &a->k, trigger_flags|
BTREE_UPDATE_NO_KEY_CACHE_COHERENCY);
}
......@@ -342,7 +340,7 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
#undef x
}
int bch2_alloc_read(struct bch_fs *c)
int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
{
struct btree_trans trans;
struct btree_iter iter;
......@@ -353,108 +351,43 @@ int bch2_alloc_read(struct bch_fs *c)
int ret;
bch2_trans_init(&trans, c, 0, 0);
down_read(&c->gc_lock);
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
if (!bkey_is_alloc(k.k))
continue;
ca = bch_dev_bkey_exists(c, k.k->p.inode);
g = bucket(ca, k.k->p.offset);
g = __bucket(ca, k.k->p.offset, gc);
u = bch2_alloc_unpack(k);
*bucket_gen(ca, k.k->p.offset) = u.gen;
if (!gc)
*bucket_gen(ca, k.k->p.offset) = u.gen;
g->_mark.gen = u.gen;
g->_mark.data_type = u.data_type;
g->_mark.dirty_sectors = u.dirty_sectors;
g->_mark.cached_sectors = u.cached_sectors;
g->_mark.stripe = u.stripe != 0;
g->stripe = u.stripe;
g->stripe_redundancy = u.stripe_redundancy;
g->io_time[READ] = u.read_time;
g->io_time[WRITE] = u.write_time;
g->oldest_gen = u.oldest_gen;
g->oldest_gen = !gc ? u.oldest_gen : u.gen;
g->gen_valid = 1;
}
bch2_trans_iter_exit(&trans, &iter);
up_read(&c->gc_lock);
bch2_trans_exit(&trans);
if (!gc ||
(metadata_only &&
(u.data_type == BCH_DATA_user ||
u.data_type == BCH_DATA_cached ||
u.data_type == BCH_DATA_parity))) {
g->_mark.data_type = u.data_type;
g->_mark.dirty_sectors = u.dirty_sectors;
g->_mark.cached_sectors = u.cached_sectors;
g->_mark.stripe = u.stripe != 0;
g->stripe = u.stripe;
g->stripe_redundancy = u.stripe_redundancy;
}
if (ret) {
bch_err(c, "error reading alloc info: %i", ret);
return ret;
}
bch2_trans_iter_exit(&trans, &iter);
return 0;
}
static int bch2_alloc_write_key(struct btree_trans *trans,
struct btree_iter *iter,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct bkey_s_c k;
struct bkey_alloc_unpacked old_u, new_u;
int ret;
retry:
bch2_trans_begin(trans);
ret = bch2_btree_key_cache_flush(trans,
BTREE_ID_alloc, iter->pos);
if (ret)
goto err;
bch2_trans_exit(&trans);
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
old_u = bch2_alloc_unpack(k);
new_u = alloc_mem_to_key(c, iter);
if (!bkey_alloc_unpacked_cmp(old_u, new_u))
return 0;
ret = bch2_alloc_write(trans, iter, &new_u,
BTREE_TRIGGER_NORUN) ?:
bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|flags);
err:
if (ret == -EINTR)
goto retry;
return ret;
}
int bch2_alloc_write_all(struct bch_fs *c, unsigned flags)
{
struct btree_trans trans;
struct btree_iter iter;
struct bch_dev *ca;
unsigned i;
int ret = 0;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
for_each_member_device(ca, c, i) {
bch2_btree_iter_set_pos(&iter,
POS(ca->dev_idx, ca->mi.first_bucket));
bch_err(c, "error reading alloc info: %i", ret);
while (iter.pos.offset < ca->mi.nbuckets) {
ret = bch2_alloc_write_key(&trans, &iter, flags);
if (ret) {
percpu_ref_put(&ca->ref);
goto err;
}
bch2_btree_iter_advance(&iter);
}
}
err:
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
return ret;
}
......
......@@ -38,40 +38,23 @@ static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l,
;
}
struct bkey_alloc_buf {
struct bkey_i k;
struct bch_alloc_v3 v;
#define x(_name, _bits) + _bits / 8
u8 _pad[0 + BCH_ALLOC_FIELDS_V2()];
#undef x
} __attribute__((packed, aligned(8)));
struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *,
const struct bkey_alloc_unpacked);
int bch2_alloc_write(struct btree_trans *, struct btree_iter *,
struct bkey_alloc_unpacked *, unsigned);
int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
static inline struct bkey_alloc_unpacked
alloc_mem_to_key(struct bch_fs *c, struct btree_iter *iter)
{
struct bch_dev *ca;
struct bucket *g;
struct bkey_alloc_unpacked ret;
percpu_down_read(&c->mark_lock);
ca = bch_dev_bkey_exists(c, iter->pos.inode);
g = bucket(ca, iter->pos.offset);
ret = (struct bkey_alloc_unpacked) {
.dev = iter->pos.inode,
.bucket = iter->pos.offset,
.gen = g->mark.gen,
.oldest_gen = g->oldest_gen,
.data_type = g->mark.data_type,
.dirty_sectors = g->mark.dirty_sectors,
.cached_sectors = g->mark.cached_sectors,
.read_time = g->io_time[READ],
.write_time = g->io_time[WRITE],
.stripe = g->stripe,
.stripe_redundancy = g->stripe_redundancy,
};
percpu_up_read(&c->mark_lock);
return ret;
}
#define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
const char *bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c);
......@@ -101,7 +84,7 @@ static inline bool bkey_is_alloc(const struct bkey *k)
k->type == KEY_TYPE_alloc_v3;
}
int bch2_alloc_read(struct bch_fs *);
int bch2_alloc_read(struct bch_fs *, bool, bool);
static inline void bch2_wake_allocator(struct bch_dev *ca)
{
......@@ -139,7 +122,6 @@ void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_stop(struct bch_dev *);
int bch2_dev_allocator_start(struct bch_dev *);
int bch2_alloc_write_all(struct bch_fs *, unsigned);
void bch2_fs_allocator_background_init(struct bch_fs *);
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
......@@ -536,7 +536,6 @@ enum {
/* misc: */
BCH_FS_NEED_ANOTHER_GC,
BCH_FS_DELETED_NODES,
BCH_FS_NEED_ALLOC_WRITE,
BCH_FS_REBUILD_REPLICAS,
BCH_FS_HOLD_BTREE_WRITES,
};
......
This diff is collapsed.
......@@ -1113,7 +1113,11 @@ int bch2_fs_recovery(struct bch_fs *c)
bch_verbose(c, "starting alloc read");
err = "error reading allocation information";
ret = bch2_alloc_read(c);
down_read(&c->gc_lock);
ret = bch2_alloc_read(c, false, false);
up_read(&c->gc_lock);
if (ret)
goto err;
bch_verbose(c, "alloc read done");
......@@ -1171,23 +1175,6 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->opts.verbose || !c->sb.clean)
bch_info(c, "journal replay done");
if (test_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags) &&
!c->opts.nochanges) {
/*
* note that even when filesystem was clean there might be work
* to do here, if we ran gc (because of fsck) which recalculated
* oldest_gen:
*/
bch_verbose(c, "writing allocation info");
err = "error writing out alloc info";
ret = bch2_alloc_write_all(c, BTREE_INSERT_LAZY_RW);
if (ret) {
bch_err(c, "error writing alloc info");
goto err;
}
bch_verbose(c, "alloc write done");
}
if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
bch2_fs_lazy_rw(c);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment