Commit f299d573 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Refactor filesystem usage accounting

Various filesystem usage counters are kept in percpu counters, with one
set per in flight journal buffer. Right now all the code that deals with
it assumes that there's only two buffers/sets of counters, but the
number of journal bufs is getting increased to 4 in the next patch - so
refactor that code to not assume a constant.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 7bfbbd88
...@@ -676,7 +676,7 @@ struct bch_fs { ...@@ -676,7 +676,7 @@ struct bch_fs {
seqcount_t usage_lock; seqcount_t usage_lock;
struct bch_fs_usage *usage_base; struct bch_fs_usage *usage_base;
struct bch_fs_usage __percpu *usage[2]; struct bch_fs_usage __percpu *usage[JOURNAL_BUF_NR];
struct bch_fs_usage __percpu *usage_gc; struct bch_fs_usage __percpu *usage_gc;
u64 __percpu *online_reserved; u64 __percpu *online_reserved;
......
...@@ -603,7 +603,6 @@ static int bch2_gc_done(struct bch_fs *c, ...@@ -603,7 +603,6 @@ static int bch2_gc_done(struct bch_fs *c,
struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0); struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0); struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
struct stripe *dst, *src; struct stripe *dst, *src;
unsigned i;
c->ec_stripes_heap.used = 0; c->ec_stripes_heap.used = 0;
......
...@@ -207,13 +207,13 @@ static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c, ...@@ -207,13 +207,13 @@ static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
{ {
return this_cpu_ptr(gc return this_cpu_ptr(gc
? c->usage_gc ? c->usage_gc
: c->usage[journal_seq & 1]); : c->usage[journal_seq & JOURNAL_BUF_MASK]);
} }
u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v) u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
{ {
ssize_t offset = v - (u64 *) c->usage_base; ssize_t offset = v - (u64 *) c->usage_base;
unsigned seq; unsigned i, seq;
u64 ret; u64 ret;
BUG_ON(offset < 0 || offset >= fs_usage_u64s(c)); BUG_ON(offset < 0 || offset >= fs_usage_u64s(c));
...@@ -221,9 +221,10 @@ u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v) ...@@ -221,9 +221,10 @@ u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
do { do {
seq = read_seqcount_begin(&c->usage_lock); seq = read_seqcount_begin(&c->usage_lock);
ret = *v + ret = *v;
percpu_u64_get((u64 __percpu *) c->usage[0] + offset) +
percpu_u64_get((u64 __percpu *) c->usage[1] + offset); for (i = 0; i < ARRAY_SIZE(c->usage); i++)
ret += percpu_u64_get((u64 __percpu *) c->usage[i] + offset);
} while (read_seqcount_retry(&c->usage_lock, seq)); } while (read_seqcount_retry(&c->usage_lock, seq));
return ret; return ret;
...@@ -232,15 +233,20 @@ u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v) ...@@ -232,15 +233,20 @@ u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c) struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c)
{ {
struct bch_fs_usage_online *ret; struct bch_fs_usage_online *ret;
unsigned seq, i, u64s; unsigned seq, i, v, u64s = fs_usage_u64s(c);
retry:
ret = kmalloc(u64s * sizeof(u64), GFP_NOFS);
if (unlikely(!ret))
return NULL;
percpu_down_read(&c->mark_lock); percpu_down_read(&c->mark_lock);
ret = kmalloc(sizeof(struct bch_fs_usage_online) + v = fs_usage_u64s(c);
sizeof(u64) + c->replicas.nr, GFP_NOFS); if (unlikely(u64s != v)) {
if (unlikely(!ret)) { u64s = v;
percpu_up_read(&c->mark_lock); percpu_up_read(&c->mark_lock);
return NULL; kfree(ret);
goto retry;
} }
ret->online_reserved = percpu_u64_get(c->online_reserved); ret->online_reserved = percpu_u64_get(c->online_reserved);
...@@ -248,7 +254,7 @@ struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c) ...@@ -248,7 +254,7 @@ struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c)
u64s = fs_usage_u64s(c); u64s = fs_usage_u64s(c);
do { do {
seq = read_seqcount_begin(&c->usage_lock); seq = read_seqcount_begin(&c->usage_lock);
memcpy(&ret->u, c->usage_base, u64s * sizeof(u64)); memcpy(ret, c->usage_base, u64s * sizeof(u64));
for (i = 0; i < ARRAY_SIZE(c->usage); i++) for (i = 0; i < ARRAY_SIZE(c->usage); i++)
acc_u64s_percpu((u64 *) &ret->u, (u64 __percpu *) c->usage[i], u64s); acc_u64s_percpu((u64 *) &ret->u, (u64 __percpu *) c->usage[i], u64s);
} while (read_seqcount_retry(&c->usage_lock, seq)); } while (read_seqcount_retry(&c->usage_lock, seq));
......
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
struct journal_res; struct journal_res;
#define JOURNAL_BUF_BITS 1
#define JOURNAL_BUF_NR (1U << JOURNAL_BUF_BITS)
#define JOURNAL_BUF_MASK (JOURNAL_BUF_NR - 1)
/* /*
* We put two of these in struct journal; we used them for writes to the * We put two of these in struct journal; we used them for writes to the
* journal that are being staged or in flight. * journal that are being staged or in flight.
......
...@@ -275,7 +275,7 @@ static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p, ...@@ -275,7 +275,7 @@ static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p,
static int replicas_table_update(struct bch_fs *c, static int replicas_table_update(struct bch_fs *c,
struct bch_replicas_cpu *new_r) struct bch_replicas_cpu *new_r)
{ {
struct bch_fs_usage __percpu *new_usage[2]; struct bch_fs_usage __percpu *new_usage[JOURNAL_BUF_NR];
struct bch_fs_usage_online *new_scratch = NULL; struct bch_fs_usage_online *new_scratch = NULL;
struct bch_fs_usage __percpu *new_gc = NULL; struct bch_fs_usage __percpu *new_gc = NULL;
struct bch_fs_usage *new_base = NULL; struct bch_fs_usage *new_base = NULL;
...@@ -283,7 +283,14 @@ static int replicas_table_update(struct bch_fs *c, ...@@ -283,7 +283,14 @@ static int replicas_table_update(struct bch_fs *c,
sizeof(u64) * new_r->nr; sizeof(u64) * new_r->nr;
unsigned scratch_bytes = sizeof(struct bch_fs_usage_online) + unsigned scratch_bytes = sizeof(struct bch_fs_usage_online) +
sizeof(u64) * new_r->nr; sizeof(u64) * new_r->nr;
int ret = -ENOMEM; int ret = 0;
memset(new_usage, 0, sizeof(new_usage));
for (i = 0; i < ARRAY_SIZE(new_usage); i++)
if (!(new_usage[i] = __alloc_percpu_gfp(bytes,
sizeof(u64), GFP_NOIO)))
goto err;
memset(new_usage, 0, sizeof(new_usage)); memset(new_usage, 0, sizeof(new_usage));
...@@ -295,10 +302,8 @@ static int replicas_table_update(struct bch_fs *c, ...@@ -295,10 +302,8 @@ static int replicas_table_update(struct bch_fs *c,
if (!(new_base = kzalloc(bytes, GFP_NOIO)) || if (!(new_base = kzalloc(bytes, GFP_NOIO)) ||
!(new_scratch = kmalloc(scratch_bytes, GFP_NOIO)) || !(new_scratch = kmalloc(scratch_bytes, GFP_NOIO)) ||
(c->usage_gc && (c->usage_gc &&
!(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO)))) { !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO))))
bch_err(c, "error updating replicas table: memory allocation failure");
goto err; goto err;
}
for (i = 0; i < ARRAY_SIZE(new_usage); i++) for (i = 0; i < ARRAY_SIZE(new_usage); i++)
if (c->usage[i]) if (c->usage[i])
...@@ -317,14 +322,17 @@ static int replicas_table_update(struct bch_fs *c, ...@@ -317,14 +322,17 @@ static int replicas_table_update(struct bch_fs *c,
swap(c->usage_scratch, new_scratch); swap(c->usage_scratch, new_scratch);
swap(c->usage_gc, new_gc); swap(c->usage_gc, new_gc);
swap(c->replicas, *new_r); swap(c->replicas, *new_r);
ret = 0; out:
err:
free_percpu(new_gc); free_percpu(new_gc);
kfree(new_scratch); kfree(new_scratch);
free_percpu(new_usage[1]); free_percpu(new_usage[1]);
free_percpu(new_usage[0]); free_percpu(new_usage[0]);
kfree(new_base); kfree(new_base);
return ret; return ret;
err:
bch_err(c, "error updating replicas table: memory allocation failure");
ret = -ENOMEM;
goto out;
} }
static unsigned reserve_journal_replicas(struct bch_fs *c, static unsigned reserve_journal_replicas(struct bch_fs *c,
...@@ -499,9 +507,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret) ...@@ -499,9 +507,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
struct bch_replicas_cpu n; struct bch_replicas_cpu n;
if (!__replicas_has_entry(&c->replicas_gc, e) && if (!__replicas_has_entry(&c->replicas_gc, e) &&
(c->usage_base->replicas[i] || bch2_fs_usage_read_one(c, &c->usage_base->replicas[i])) {
percpu_u64_get(&c->usage[0]->replicas[i]) ||
percpu_u64_get(&c->usage[1]->replicas[i]))) {
n = cpu_replicas_add_entry(&c->replicas_gc, e); n = cpu_replicas_add_entry(&c->replicas_gc, e);
if (!n.entries) { if (!n.entries) {
ret = -ENOSPC; ret = -ENOSPC;
...@@ -606,9 +612,7 @@ int bch2_replicas_gc2(struct bch_fs *c) ...@@ -606,9 +612,7 @@ int bch2_replicas_gc2(struct bch_fs *c)
cpu_replicas_entry(&c->replicas, i); cpu_replicas_entry(&c->replicas, i);
if (e->data_type == BCH_DATA_journal || if (e->data_type == BCH_DATA_journal ||
c->usage_base->replicas[i] || bch2_fs_usage_read_one(c, &c->usage_base->replicas[i]))
percpu_u64_get(&c->usage[0]->replicas[i]) ||
percpu_u64_get(&c->usage[1]->replicas[i]))
memcpy(cpu_replicas_entry(&new, new.nr++), memcpy(cpu_replicas_entry(&new, new.nr++),
e, new.entry_size); e, new.entry_size);
} }
......
...@@ -998,7 +998,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c, ...@@ -998,7 +998,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
for (i = 0; i < ARRAY_SIZE(c->usage); i++) for (i = 0; i < ARRAY_SIZE(c->usage); i++)
bch2_fs_usage_acc_to_base(c, i); bch2_fs_usage_acc_to_base(c, i);
} else { } else {
bch2_fs_usage_acc_to_base(c, journal_seq & 1); bch2_fs_usage_acc_to_base(c, journal_seq & JOURNAL_BUF_MASK);
} }
{ {
......
...@@ -483,8 +483,8 @@ static void __bch2_fs_free(struct bch_fs *c) ...@@ -483,8 +483,8 @@ static void __bch2_fs_free(struct bch_fs *c)
percpu_free_rwsem(&c->mark_lock); percpu_free_rwsem(&c->mark_lock);
free_percpu(c->online_reserved); free_percpu(c->online_reserved);
kfree(c->usage_scratch); kfree(c->usage_scratch);
free_percpu(c->usage[1]); for (i = 0; i < ARRAY_SIZE(c->usage); i++)
free_percpu(c->usage[0]); free_percpu(c->usage[i]);
kfree(c->usage_base); kfree(c->usage_base);
if (c->btree_iters_bufs) if (c->btree_iters_bufs)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment