Commit 5b650fd1 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Account for internal fragmentation better

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 09f3297a
...@@ -493,7 +493,8 @@ static void bch2_gc_start(struct bch_fs *c) ...@@ -493,7 +493,8 @@ static void bch2_gc_start(struct bch_fs *c)
struct bch_fs_usage *p = struct bch_fs_usage *p =
per_cpu_ptr(c->usage_percpu, cpu); per_cpu_ptr(c->usage_percpu, cpu);
memset(p->s, 0, sizeof(p->s)); memset(p->replicas, 0, sizeof(p->replicas));
memset(p->buckets, 0, sizeof(p->buckets));
} }
percpu_up_write(&c->usage_lock); percpu_up_write(&c->usage_lock);
......
...@@ -184,7 +184,8 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b, ...@@ -184,7 +184,8 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b,
*/ */
replicas = bch2_extent_nr_dirty_ptrs(k); replicas = bch2_extent_nr_dirty_ptrs(k);
if (replicas) if (replicas)
stats->s[replicas - 1].data[BCH_DATA_BTREE] -= c->opts.btree_node_size; stats->replicas[replicas - 1].data[BCH_DATA_BTREE] -=
c->opts.btree_node_size;
/* /*
* We're dropping @k from the btree, but it's still live until the * We're dropping @k from the btree, but it's still live until the
......
...@@ -73,6 +73,8 @@ ...@@ -73,6 +73,8 @@
#include <linux/preempt.h> #include <linux/preempt.h>
static inline u64 __bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
#ifdef DEBUG_BUCKETS #ifdef DEBUG_BUCKETS
#define lg_local_lock lg_global_lock #define lg_local_lock lg_global_lock
...@@ -84,18 +86,24 @@ static void bch2_fs_stats_verify(struct bch_fs *c) ...@@ -84,18 +86,24 @@ static void bch2_fs_stats_verify(struct bch_fs *c)
__bch2_fs_usage_read(c); __bch2_fs_usage_read(c);
unsigned i, j; unsigned i, j;
for (i = 0; i < ARRAY_SIZE(stats.s); i++) { for (i = 0; i < ARRAY_SIZE(stats.replicas); i++) {
for (j = 0; j < ARRAY_SIZE(stats.s[i].data); j++) for (j = 0; j < ARRAY_SIZE(stats.replicas[i].data); j++)
if ((s64) stats.s[i].data[j] < 0) if ((s64) stats.replicas[i].data[j] < 0)
panic("replicas %u %s underflow: %lli\n", panic("replicas %u %s sectors underflow: %lli\n",
i + 1, bch_data_types[j], i + 1, bch_data_types[j],
stats.s[i].data[j]); stats.replicas[i].data[j]);
if ((s64) stats.s[i].persistent_reserved < 0) if ((s64) stats.replicas[i].persistent_reserved < 0)
panic("replicas %u reserved underflow: %lli\n", panic("replicas %u reserved underflow: %lli\n",
i + 1, stats.s[i].persistent_reserved); i + 1, stats.replicas[i].persistent_reserved);
} }
for (j = 0; j < ARRAY_SIZE(stats.buckets); j++)
if ((s64) stats.replicas[i].data_buckets[j] < 0)
panic("%s buckets underflow: %lli\n",
bch_data_types[j],
stats.buckets[j]);
if ((s64) stats.online_reserved < 0) if ((s64) stats.online_reserved < 0)
panic("sectors_online_reserved underflow: %lli\n", panic("sectors_online_reserved underflow: %lli\n",
stats.online_reserved); stats.online_reserved);
...@@ -238,6 +246,7 @@ bch2_fs_usage_read(struct bch_fs *c) ...@@ -238,6 +246,7 @@ bch2_fs_usage_read(struct bch_fs *c)
} }
struct fs_usage_sum { struct fs_usage_sum {
u64 hidden;
u64 data; u64 data;
u64 reserved; u64 reserved;
}; };
...@@ -247,14 +256,21 @@ static inline struct fs_usage_sum __fs_usage_sum(struct bch_fs_usage stats) ...@@ -247,14 +256,21 @@ static inline struct fs_usage_sum __fs_usage_sum(struct bch_fs_usage stats)
struct fs_usage_sum sum = { 0 }; struct fs_usage_sum sum = { 0 };
unsigned i, j; unsigned i, j;
for (i = 0; i < ARRAY_SIZE(stats.s); i++) { /*
u64 a = 0; * For superblock and journal we count bucket usage, not sector usage,
* because any internal fragmentation should _not_ be counted as
* free space:
*/
for (j = 1; j < BCH_DATA_BTREE; j++)
sum.hidden += stats.buckets[j];
for (j = 0; j < ARRAY_SIZE(stats.s[i].data); j++) for (i = 0; i < ARRAY_SIZE(stats.replicas); i++) {
a += stats.s[i].data[j]; for (j = BCH_DATA_BTREE;
j < ARRAY_SIZE(stats.replicas[i].data);
j++)
sum.data += stats.replicas[i].data[j] * (i + 1);
sum.data += a * (i + 1); sum.reserved += stats.replicas[i].persistent_reserved * (i + 1);
sum.reserved += stats.s[i].persistent_reserved * (i + 1);
} }
sum.reserved += stats.online_reserved; sum.reserved += stats.online_reserved;
...@@ -270,14 +286,14 @@ static u64 reserve_factor(u64 r) ...@@ -270,14 +286,14 @@ static u64 reserve_factor(u64 r)
static u64 avail_factor(u64 r) static u64 avail_factor(u64 r)
{ {
return (r << RESERVE_FACTOR) / (1 << RESERVE_FACTOR) + 1; return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1);
} }
u64 __bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats) static inline u64 __bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
{ {
struct fs_usage_sum sum = __fs_usage_sum(stats); struct fs_usage_sum sum = __fs_usage_sum(stats);
return sum.data + reserve_factor(sum.reserved); return sum.hidden + sum.data + reserve_factor(sum.reserved);
} }
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats) u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
...@@ -285,9 +301,9 @@ u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats) ...@@ -285,9 +301,9 @@ u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
return min(c->capacity, __bch2_fs_sectors_used(c, stats)); return min(c->capacity, __bch2_fs_sectors_used(c, stats));
} }
u64 bch2_fs_sectors_free(struct bch_fs *c, struct bch_fs_usage stats) static u64 bch2_fs_sectors_free(struct bch_fs *c, struct bch_fs_usage stats)
{ {
return avail_factor(c->capacity - bch2_fs_sectors_used(c, stats)); return c->capacity - bch2_fs_sectors_used(c, stats);
} }
static inline int is_unavailable_bucket(struct bucket_mark m) static inline int is_unavailable_bucket(struct bucket_mark m)
...@@ -323,9 +339,9 @@ static bool bucket_became_unavailable(struct bch_fs *c, ...@@ -323,9 +339,9 @@ static bool bucket_became_unavailable(struct bch_fs *c,
} }
void bch2_fs_usage_apply(struct bch_fs *c, void bch2_fs_usage_apply(struct bch_fs *c,
struct bch_fs_usage *stats, struct bch_fs_usage *stats,
struct disk_reservation *disk_res, struct disk_reservation *disk_res,
struct gc_pos gc_pos) struct gc_pos gc_pos)
{ {
struct fs_usage_sum sum = __fs_usage_sum(*stats); struct fs_usage_sum sum = __fs_usage_sum(*stats);
s64 added = sum.data + sum.reserved; s64 added = sum.data + sum.reserved;
...@@ -358,6 +374,7 @@ void bch2_fs_usage_apply(struct bch_fs *c, ...@@ -358,6 +374,7 @@ void bch2_fs_usage_apply(struct bch_fs *c,
} }
static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
struct bch_fs_usage *stats,
struct bucket_mark old, struct bucket_mark new) struct bucket_mark old, struct bucket_mark new)
{ {
struct bch_dev_usage *dev_usage; struct bch_dev_usage *dev_usage;
...@@ -374,6 +391,9 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, ...@@ -374,6 +391,9 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
bch2_data_types[new.data_type]); bch2_data_types[new.data_type]);
} }
stats->buckets[bucket_type(old)] -= ca->mi.bucket_size;
stats->buckets[bucket_type(new)] += ca->mi.bucket_size;
preempt_disable(); preempt_disable();
dev_usage = this_cpu_ptr(ca->usage_percpu); dev_usage = this_cpu_ptr(ca->usage_percpu);
...@@ -399,17 +419,18 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, ...@@ -399,17 +419,18 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
bch2_dev_stats_verify(ca); bch2_dev_stats_verify(ca);
} }
#define bucket_data_cmpxchg(c, ca, g, new, expr) \ #define bucket_data_cmpxchg(c, ca, stats, g, new, expr) \
({ \ ({ \
struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \ struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
\ \
bch2_dev_usage_update(c, ca, _old, new); \ bch2_dev_usage_update(c, ca, stats, _old, new); \
_old; \ _old; \
}) })
void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, struct bucket_mark *old) size_t b, struct bucket_mark *old)
{ {
struct bch_fs_usage *stats = this_cpu_ptr(c->usage_percpu);
struct bucket *g; struct bucket *g;
struct bucket_mark new; struct bucket_mark new;
...@@ -417,7 +438,7 @@ void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, ...@@ -417,7 +438,7 @@ void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
g = bucket(ca, b); g = bucket(ca, b);
*old = bucket_data_cmpxchg(c, ca, g, new, ({ *old = bucket_data_cmpxchg(c, ca, stats, g, new, ({
BUG_ON(!is_available_bucket(new)); BUG_ON(!is_available_bucket(new));
new.owned_by_allocator = 1; new.owned_by_allocator = 1;
...@@ -436,6 +457,7 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, ...@@ -436,6 +457,7 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, bool owned_by_allocator, size_t b, bool owned_by_allocator,
struct gc_pos pos, unsigned flags) struct gc_pos pos, unsigned flags)
{ {
struct bch_fs_usage *stats = this_cpu_ptr(c->usage_percpu);
struct bucket *g; struct bucket *g;
struct bucket_mark old, new; struct bucket_mark old, new;
...@@ -446,7 +468,7 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, ...@@ -446,7 +468,7 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
gc_will_visit(c, pos)) gc_will_visit(c, pos))
return; return;
old = bucket_data_cmpxchg(c, ca, g, new, ({ old = bucket_data_cmpxchg(c, ca, stats, g, new, ({
new.owned_by_allocator = owned_by_allocator; new.owned_by_allocator = owned_by_allocator;
})); }));
...@@ -466,10 +488,12 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, ...@@ -466,10 +488,12 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
unsigned sectors, struct gc_pos pos, unsigned sectors, struct gc_pos pos,
unsigned flags) unsigned flags)
{ {
struct bch_fs_usage *stats;
struct bucket *g; struct bucket *g;
struct bucket_mark old, new; struct bucket_mark old, new;
BUG_ON(!type); BUG_ON(type != BCH_DATA_SB &&
type != BCH_DATA_JOURNAL);
if (likely(c)) { if (likely(c)) {
percpu_rwsem_assert_held(&c->usage_lock); percpu_rwsem_assert_held(&c->usage_lock);
...@@ -479,16 +503,17 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, ...@@ -479,16 +503,17 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
return; return;
} }
rcu_read_lock(); preempt_disable();
stats = this_cpu_ptr(c->usage_percpu);
g = bucket(ca, b); g = bucket(ca, b);
old = bucket_data_cmpxchg(c, ca, g, new, ({ old = bucket_data_cmpxchg(c, ca, stats, g, new, ({
new.data_type = type; new.data_type = type;
checked_add(new.dirty_sectors, sectors); checked_add(new.dirty_sectors, sectors);
new.dirty_sectors += sectors;
})); }));
rcu_read_unlock(); stats->replicas[0].data[type] += sectors;
preempt_enable();
BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) && BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) &&
bucket_became_unavailable(c, old, new)); bucket_became_unavailable(c, old, new));
...@@ -589,7 +614,7 @@ static void bch2_mark_pointer(struct bch_fs *c, ...@@ -589,7 +614,7 @@ static void bch2_mark_pointer(struct bch_fs *c,
old.v.counter, old.v.counter,
new.v.counter)) != old.v.counter); new.v.counter)) != old.v.counter);
bch2_dev_usage_update(c, ca, old, new); bch2_dev_usage_update(c, ca, stats, old, new);
BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) && BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) &&
bucket_became_unavailable(c, old, new)); bucket_became_unavailable(c, old, new));
...@@ -601,6 +626,10 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, ...@@ -601,6 +626,10 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
struct bch_fs_usage *stats, struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags) u64 journal_seq, unsigned flags)
{ {
unsigned replicas = bch2_extent_nr_dirty_ptrs(k);
BUG_ON(replicas && replicas - 1 > ARRAY_SIZE(stats->replicas));
/* /*
* synchronization w.r.t. GC: * synchronization w.r.t. GC:
* *
...@@ -643,32 +672,22 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, ...@@ -643,32 +672,22 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
struct bkey_s_c_extent e = bkey_s_c_to_extent(k); struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const struct bch_extent_ptr *ptr; const struct bch_extent_ptr *ptr;
struct bch_extent_crc_unpacked crc; struct bch_extent_crc_unpacked crc;
unsigned replicas = 0;
BUG_ON(!sectors); BUG_ON(!sectors);
extent_for_each_ptr_crc(e, ptr, crc) { extent_for_each_ptr_crc(e, ptr, crc)
bch2_mark_pointer(c, e, ptr, crc, sectors, data_type, bch2_mark_pointer(c, e, ptr, crc, sectors, data_type,
stats, journal_seq, flags); stats, journal_seq, flags);
replicas += !ptr->cached;
}
if (replicas) { if (replicas)
BUG_ON(replicas - 1 > ARRAY_SIZE(stats->s)); stats->replicas[replicas - 1].data[data_type] += sectors;
stats->s[replicas - 1].data[data_type] += sectors;
}
break; break;
} }
case BCH_RESERVATION: { case BCH_RESERVATION:
struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); if (replicas)
stats->replicas[replicas - 1].persistent_reserved += sectors;
if (r.v->nr_replicas) {
BUG_ON(r.v->nr_replicas - 1 > ARRAY_SIZE(stats->s));
stats->s[r.v->nr_replicas - 1].persistent_reserved += sectors;
}
break; break;
} }
}
percpu_up_read(&c->usage_lock); percpu_up_read(&c->usage_lock);
} }
...@@ -681,7 +700,7 @@ static u64 __recalc_sectors_available(struct bch_fs *c) ...@@ -681,7 +700,7 @@ static u64 __recalc_sectors_available(struct bch_fs *c)
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
per_cpu_ptr(c->usage_percpu, cpu)->available_cache = 0; per_cpu_ptr(c->usage_percpu, cpu)->available_cache = 0;
return bch2_fs_sectors_free(c, bch2_fs_usage_read(c)); return avail_factor(bch2_fs_sectors_free(c, bch2_fs_usage_read(c)));
} }
/* Used by gc when it's starting: */ /* Used by gc when it's starting: */
......
...@@ -173,9 +173,7 @@ struct bch_fs_usage bch2_fs_usage_read(struct bch_fs *); ...@@ -173,9 +173,7 @@ struct bch_fs_usage bch2_fs_usage_read(struct bch_fs *);
void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *, void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
struct disk_reservation *, struct gc_pos); struct disk_reservation *, struct gc_pos);
u64 __bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage); u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
u64 bch2_fs_sectors_free(struct bch_fs *, struct bch_fs_usage);
static inline bool is_available_bucket(struct bucket_mark mark) static inline bool is_available_bucket(struct bucket_mark mark)
{ {
......
...@@ -69,7 +69,9 @@ struct bch_fs_usage { ...@@ -69,7 +69,9 @@ struct bch_fs_usage {
struct { struct {
u64 data[BCH_DATA_NR]; u64 data[BCH_DATA_NR];
u64 persistent_reserved; u64 persistent_reserved;
} s[BCH_REPLICAS_MAX]; } replicas[BCH_REPLICAS_MAX];
u64 buckets[BCH_DATA_NR];
}; };
/* /*
......
...@@ -404,10 +404,10 @@ static long bch2_ioctl_usage(struct bch_fs *c, ...@@ -404,10 +404,10 @@ static long bch2_ioctl_usage(struct bch_fs *c,
for (i = 0; i < BCH_REPLICAS_MAX; i++) { for (i = 0; i < BCH_REPLICAS_MAX; i++) {
dst.persistent_reserved[i] = dst.persistent_reserved[i] =
src.s[i].persistent_reserved; src.replicas[i].persistent_reserved;
for (j = 0; j < BCH_DATA_NR; j++) for (j = 0; j < BCH_DATA_NR; j++)
dst.sectors[j][i] = src.s[i].data[j]; dst.sectors[j][i] = src.replicas[i].data[j];
} }
ret = copy_to_user(&user_arg->fs, &dst, sizeof(dst)); ret = copy_to_user(&user_arg->fs, &dst, sizeof(dst));
......
...@@ -1428,13 +1428,16 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -1428,13 +1428,16 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
{ {
struct super_block *sb = dentry->d_sb; struct super_block *sb = dentry->d_sb;
struct bch_fs *c = sb->s_fs_info; struct bch_fs *c = sb->s_fs_info;
struct bch_fs_usage usage = bch2_fs_usage_read(c);
u64 hidden_metadata = usage.buckets[BCH_DATA_SB] +
usage.buckets[BCH_DATA_JOURNAL];
unsigned shift = sb->s_blocksize_bits - 9;
u64 fsid; u64 fsid;
buf->f_type = BCACHEFS_STATFS_MAGIC; buf->f_type = BCACHEFS_STATFS_MAGIC;
buf->f_bsize = sb->s_blocksize; buf->f_bsize = sb->s_blocksize;
buf->f_blocks = c->capacity >> PAGE_SECTOR_SHIFT; buf->f_blocks = (c->capacity - hidden_metadata) >> shift;
buf->f_bfree = bch2_fs_sectors_free(c, bch2_fs_usage_read(c)) >> buf->f_bfree = (c->capacity - bch2_fs_sectors_used(c, usage)) >> shift;
PAGE_SECTOR_SHIFT;
buf->f_bavail = buf->f_bfree; buf->f_bavail = buf->f_bfree;
buf->f_files = atomic_long_read(&c->nr_inodes); buf->f_files = atomic_long_read(&c->nr_inodes);
buf->f_ffree = U64_MAX; buf->f_ffree = U64_MAX;
......
...@@ -238,7 +238,7 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf) ...@@ -238,7 +238,7 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
"capacity:\t\t%llu\n", "capacity:\t\t%llu\n",
c->capacity); c->capacity);
for (replicas = 0; replicas < ARRAY_SIZE(stats.s); replicas++) { for (replicas = 0; replicas < ARRAY_SIZE(stats.replicas); replicas++) {
out += scnprintf(out, end - out, out += scnprintf(out, end - out,
"%u replicas:\n", "%u replicas:\n",
replicas + 1); replicas + 1);
...@@ -247,12 +247,20 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf) ...@@ -247,12 +247,20 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
out += scnprintf(out, end - out, out += scnprintf(out, end - out,
"\t%s:\t\t%llu\n", "\t%s:\t\t%llu\n",
bch2_data_types[type], bch2_data_types[type],
stats.s[replicas].data[type]); stats.replicas[replicas].data[type]);
out += scnprintf(out, end - out, out += scnprintf(out, end - out,
"\treserved:\t%llu\n", "\treserved:\t%llu\n",
stats.s[replicas].persistent_reserved); stats.replicas[replicas].persistent_reserved);
} }
out += scnprintf(out, end - out, "bucket usage\n");
for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++)
out += scnprintf(out, end - out,
"\t%s:\t\t%llu\n",
bch2_data_types[type],
stats.buckets[type]);
out += scnprintf(out, end - out, out += scnprintf(out, end - out,
"online reserved:\t%llu\n", "online reserved:\t%llu\n",
stats.online_reserved); stats.online_reserved);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment