Commit a1d58243 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: add ability to run gc on metadata only

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent cccf4e6d
......@@ -292,8 +292,7 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
}
percpu_down_write(&c->mark_lock);
for_each_member_device(ca, c, i)
bch2_dev_usage_from_buckets(c, ca);
bch2_dev_usage_from_buckets(c);
percpu_up_write(&c->mark_lock);
mutex_lock(&c->bucket_clock[READ].lock);
......
......@@ -204,7 +204,7 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
}
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
bool initial)
bool initial, bool metadata_only)
{
struct btree_trans trans;
struct btree_iter *iter;
......@@ -224,7 +224,9 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
* and on startup, we have to read every btree node (XXX: only if it was
* an unclean shutdown)
*/
if (initial || expensive_debug_checks(c))
if (metadata_only)
depth = 1;
else if (initial || expensive_debug_checks(c))
depth = 0;
btree_node_range_checks_init(&r, depth);
......@@ -280,7 +282,7 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
}
static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
bool initial)
bool initial, bool metadata_only)
{
enum btree_id ids[BTREE_ID_NR];
u8 max_stale;
......@@ -294,11 +296,12 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
enum btree_id id = ids[i];
enum btree_node_type type = __btree_node_type(0, id);
int ret = bch2_gc_btree(c, id, initial);
int ret = bch2_gc_btree(c, id, initial, metadata_only);
if (ret)
return ret;
if (journal && btree_node_type_needs_gc(type)) {
if (journal && !metadata_only &&
btree_node_type_needs_gc(type)) {
struct bkey_i *k, *n;
struct jset_entry *j;
struct journal_replay *r;
......@@ -476,11 +479,13 @@ static void bch2_gc_free(struct bch_fs *c)
c->usage[1] = NULL;
}
static int bch2_gc_done(struct bch_fs *c, bool initial)
static int bch2_gc_done(struct bch_fs *c,
bool initial, bool metadata_only)
{
struct bch_dev *ca;
bool verify = !initial ||
(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO));
bool verify = !metadata_only &&
(!initial ||
(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)));
unsigned i;
int ret = 0;
......@@ -515,7 +520,7 @@ static int bch2_gc_done(struct bch_fs *c, bool initial)
#define copy_fs_field(_f, _msg, ...) \
copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
{
if (!metadata_only) {
struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
struct stripe *dst, *src;
......@@ -567,26 +572,7 @@ static int bch2_gc_done(struct bch_fs *c, bool initial)
}
};
for_each_member_device(ca, c, i) {
unsigned nr = sizeof(struct bch_dev_usage) / sizeof(u64);
struct bch_dev_usage *dst = (void *)
bch2_acc_percpu_u64s((void *) ca->usage[0], nr);
struct bch_dev_usage *src = (void *)
bch2_acc_percpu_u64s((void *) ca->usage[1], nr);
unsigned b;
for (b = 0; b < BCH_DATA_NR; b++)
copy_dev_field(buckets[b], "buckets[%s]",
bch2_data_types[b]);
copy_dev_field(buckets_alloc, "buckets_alloc");
copy_dev_field(buckets_ec, "buckets_ec");
copy_dev_field(buckets_unavailable, "buckets_unavailable");
for (b = 0; b < BCH_DATA_NR; b++)
copy_dev_field(sectors[b], "sectors[%s]",
bch2_data_types[b]);
copy_dev_field(sectors_fragmented, "sectors_fragmented");
}
bch2_dev_usage_from_buckets(c);
{
unsigned nr = fs_usage_u64s(c);
......@@ -596,20 +582,29 @@ static int bch2_gc_done(struct bch_fs *c, bool initial)
bch2_acc_percpu_u64s((void *) c->usage[1], nr);
copy_fs_field(hidden, "hidden");
copy_fs_field(data, "data");
copy_fs_field(cached, "cached");
copy_fs_field(reserved, "reserved");
copy_fs_field(nr_inodes, "nr_inodes");
copy_fs_field(btree, "btree");
for (i = 0; i < BCH_REPLICAS_MAX; i++)
copy_fs_field(persistent_reserved[i],
"persistent_reserved[%i]", i);
if (!metadata_only) {
copy_fs_field(data, "data");
copy_fs_field(cached, "cached");
copy_fs_field(reserved, "reserved");
copy_fs_field(nr_inodes,"nr_inodes");
for (i = 0; i < BCH_REPLICAS_MAX; i++)
copy_fs_field(persistent_reserved[i],
"persistent_reserved[%i]", i);
}
for (i = 0; i < c->replicas.nr; i++) {
struct bch_replicas_entry *e =
cpu_replicas_entry(&c->replicas, i);
char buf[80];
if (metadata_only &&
(e->data_type == BCH_DATA_USER ||
e->data_type == BCH_DATA_CACHED))
continue;
bch2_replicas_entry_to_text(&PBUF(buf), e);
copy_fs_field(replicas[i], "%s", buf);
......@@ -625,7 +620,8 @@ static int bch2_gc_done(struct bch_fs *c, bool initial)
return ret;
}
static int bch2_gc_start(struct bch_fs *c)
static int bch2_gc_start(struct bch_fs *c,
bool metadata_only)
{
struct bch_dev *ca;
unsigned i;
......@@ -671,10 +667,18 @@ static int bch2_gc_start(struct bch_fs *c)
dst->nbuckets = src->nbuckets;
for (b = 0; b < src->nbuckets; b++) {
dst->b[b]._mark.gen =
dst->b[b].oldest_gen =
src->b[b].mark.gen;
dst->b[b].gen_valid = src->b[b].gen_valid;
struct bucket *d = &dst->b[b];
struct bucket *s = &src->b[b];
d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
d->gen_valid = s->gen_valid;
if (metadata_only &&
(s->mark.data_type == BCH_DATA_USER ||
s->mark.data_type == BCH_DATA_CACHED)) {
d->_mark = s->mark;
d->_mark.owned_by_allocator = 0;
}
}
};
......@@ -699,7 +703,8 @@ static int bch2_gc_start(struct bch_fs *c)
* move around - if references move backwards in the ordering GC
* uses, GC could skip past them
*/
int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
int bch2_gc(struct bch_fs *c, struct list_head *journal,
bool initial, bool metadata_only)
{
struct bch_dev *ca;
u64 start_time = local_clock();
......@@ -711,7 +716,7 @@ int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
down_write(&c->gc_lock);
again:
percpu_down_write(&c->mark_lock);
ret = bch2_gc_start(c);
ret = bch2_gc_start(c, metadata_only);
percpu_up_write(&c->mark_lock);
if (ret)
......@@ -719,7 +724,7 @@ int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
bch2_mark_superblocks(c);
ret = bch2_gc_btrees(c, journal, initial);
ret = bch2_gc_btrees(c, journal, initial, metadata_only);
if (ret)
goto out;
......@@ -753,7 +758,7 @@ int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
percpu_down_write(&c->mark_lock);
if (!ret)
ret = bch2_gc_done(c, initial);
ret = bch2_gc_done(c, initial, metadata_only);
/* Indicates that gc is no longer in progress: */
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
......@@ -1155,7 +1160,7 @@ static int bch2_gc_thread(void *arg)
last = atomic_long_read(&clock->now);
last_kick = atomic_read(&c->kick_gc);
ret = bch2_gc(c, NULL, false);
ret = bch2_gc(c, NULL, false, false);
if (ret)
bch_err(c, "btree gc failed: %i", ret);
......
......@@ -5,7 +5,7 @@
#include "btree_types.h"
void bch2_coalesce(struct bch_fs *);
int bch2_gc(struct bch_fs *, struct list_head *, bool);
int bch2_gc(struct bch_fs *, struct list_head *, bool, bool);
void bch2_gc_thread_stop(struct bch_fs *);
int bch2_gc_thread_start(struct bch_fs *);
void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
......
......@@ -132,6 +132,8 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
switch (e->data_type) {
case BCH_DATA_BTREE:
usage->btree += usage->replicas[i];
break;
case BCH_DATA_USER:
usage->data += usage->replicas[i];
break;
......@@ -226,6 +228,7 @@ static u64 avail_factor(u64 r)
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
{
return min(fs_usage->hidden +
fs_usage->btree +
fs_usage->data +
reserve_factor(fs_usage->reserved +
fs_usage->online_reserved),
......@@ -241,7 +244,8 @@ __bch2_fs_usage_read_short(struct bch_fs *c)
ret.capacity = c->capacity -
percpu_u64_get(&c->usage[0]->hidden);
data = percpu_u64_get(&c->usage[0]->data);
data = percpu_u64_get(&c->usage[0]->data) +
percpu_u64_get(&c->usage[0]->btree);
reserved = percpu_u64_get(&c->usage[0]->reserved) +
percpu_u64_get(&c->usage[0]->online_reserved);
......@@ -386,12 +390,17 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
bch2_wake_allocator(ca);
}
void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca)
void bch2_dev_usage_from_buckets(struct bch_fs *c)
{
struct bch_dev *ca;
struct bucket_mark old = { .v.counter = 0 };
struct bch_fs_usage *fs_usage;
struct bucket_array *buckets;
struct bucket *g;
unsigned i;
int cpu;
percpu_u64_set(&c->usage[0]->hidden, 0);
/*
* This is only called during startup, before there's any multithreaded
......@@ -401,11 +410,17 @@ void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca)
fs_usage = this_cpu_ptr(c->usage[0]);
preempt_enable();
buckets = bucket_array(ca);
for_each_member_device(ca, c, i) {
for_each_possible_cpu(cpu)
memset(per_cpu_ptr(ca->usage[0], cpu), 0,
sizeof(*ca->usage[0]));
buckets = bucket_array(ca);
for_each_bucket(g, buckets)
if (g->mark.data_type)
bch2_dev_usage_update(c, ca, fs_usage, old, g->mark, false);
for_each_bucket(g, buckets)
bch2_dev_usage_update(c, ca, fs_usage,
old, g->mark, false);
}
}
#define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr) \
......@@ -426,10 +441,17 @@ static inline void update_replicas(struct bch_fs *c,
BUG_ON(idx < 0);
BUG_ON(!sectors);
if (r->data_type == BCH_DATA_CACHED)
fs_usage->cached += sectors;
else
switch (r->data_type) {
case BCH_DATA_BTREE:
fs_usage->btree += sectors;
break;
case BCH_DATA_USER:
fs_usage->data += sectors;
break;
case BCH_DATA_CACHED:
fs_usage->cached += sectors;
break;
}
fs_usage->replicas[idx] += sectors;
}
......
......@@ -174,7 +174,7 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m,
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
void bch2_dev_usage_from_buckets(struct bch_fs *, struct bch_dev *);
void bch2_dev_usage_from_buckets(struct bch_fs *);
static inline u64 __dev_buckets_available(struct bch_dev *ca,
struct bch_dev_usage stats)
......
......@@ -70,6 +70,7 @@ struct bch_fs_usage {
u64 gc_start[0];
u64 hidden;
u64 btree;
u64 data;
u64 cached;
u64 reserved;
......
......@@ -361,7 +361,7 @@ int bch2_fs_recovery(struct bch_fs *c)
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
bch_verbose(c, "starting mark and sweep:");
err = "error in recovery";
ret = bch2_gc(c, &journal, true);
ret = bch2_gc(c, &journal, true, false);
if (ret)
goto err;
bch_verbose(c, "mark and sweep done");
......
......@@ -497,7 +497,7 @@ STORE(__bch2_fs)
bch2_coalesce(c);
if (attr == &sysfs_trigger_gc)
bch2_gc(c, NULL, false);
bch2_gc(c, NULL, false, false);
if (attr == &sysfs_trigger_alloc_write) {
bool wrote;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment