Commit 91065976 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Mark stripe buckets with correct data type

Currently, we don't use bucket data type for tracking whether buckets
are part of a stripe; parity buckets are BCH_DATA_parity, but data
buckets in a stripe are BCH_DATA_user. There's a separate counter,
buckets_ec, outside the BCH_DATA_TYPES system for tracking number of
buckets on a device that are part of a stripe.

The trouble with this approach is that it's too coarse grained, and we
need better information on fragmentation for debugging copygc.

With this patch, data buckets in a stripe are now tracked as
BCH_DATA_stripe buckets.

This doesn't yet differentiate between erasure coded and non-erasure
coded data in a stripe bucket, nor do we yet track empty data buckets in
stripes.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 3329cf1b
...@@ -44,10 +44,10 @@ static inline enum bch_data_type __alloc_data_type(u32 dirty_sectors, ...@@ -44,10 +44,10 @@ static inline enum bch_data_type __alloc_data_type(u32 dirty_sectors,
struct bch_alloc_v4 a, struct bch_alloc_v4 a,
enum bch_data_type data_type) enum bch_data_type data_type)
{ {
if (stripe)
return data_type == BCH_DATA_parity ? data_type : BCH_DATA_stripe;
if (dirty_sectors) if (dirty_sectors)
return data_type; return data_type;
if (stripe)
return BCH_DATA_stripe;
if (cached_sectors) if (cached_sectors)
return BCH_DATA_cached; return BCH_DATA_cached;
if (BCH_ALLOC_V4_NEED_DISCARD(&a)) if (BCH_ALLOC_V4_NEED_DISCARD(&a))
...@@ -64,19 +64,31 @@ static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a, ...@@ -64,19 +64,31 @@ static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a,
a.stripe, a, data_type); a.stripe, a, data_type);
} }
static inline enum bch_data_type bucket_data_type(enum bch_data_type data_type)
{
return data_type == BCH_DATA_stripe ? BCH_DATA_user : data_type;
}
static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a) static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a)
{ {
return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0;
} }
#define DATA_TYPES_MOVABLE \
((1U << BCH_DATA_btree)| \
(1U << BCH_DATA_user)| \
(1U << BCH_DATA_stripe))
static inline bool data_type_movable(enum bch_data_type type)
{
return (1U << type) & DATA_TYPES_MOVABLE;
}
static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
struct bch_dev *ca) struct bch_dev *ca)
{ {
if (a.data_type != BCH_DATA_btree && if (!data_type_movable(a.data_type) ||
a.data_type != BCH_DATA_user) a.dirty_sectors >= ca->mi.bucket_size)
return 0;
if (a.dirty_sectors >= ca->mi.bucket_size)
return 0; return 0;
return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size); return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size);
......
...@@ -96,12 +96,20 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans, ...@@ -96,12 +96,20 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k->k_i); return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k->k_i);
} }
static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level,
struct bkey_s_c k, struct extent_ptr_decoded p)
{
return level ? BCH_DATA_btree :
p.has_ec ? BCH_DATA_stripe :
BCH_DATA_user;
}
static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
enum btree_id btree_id, unsigned level, enum btree_id btree_id, unsigned level,
struct bkey_s_c k, struct extent_ptr_decoded p, struct bkey_s_c k, struct extent_ptr_decoded p,
struct bpos *bucket_pos, struct bch_backpointer *bp) struct bpos *bucket_pos, struct bch_backpointer *bp)
{ {
enum bch_data_type data_type = level ? BCH_DATA_btree : BCH_DATA_user; enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
s64 sectors = level ? btree_sectors(c) : k.k->size; s64 sectors = level ? btree_sectors(c) : k.k->size;
u32 bucket_offset; u32 bucket_offset;
......
...@@ -633,8 +633,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id ...@@ -633,8 +633,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen) if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen)
continue; continue;
if (fsck_err_on(g->data_type && if (fsck_err_on(bucket_data_type(g->data_type) &&
g->data_type != data_type, c, bucket_data_type(g->data_type) != data_type, c,
"bucket %u:%zu different types of data in same bucket: %s, %s\n" "bucket %u:%zu different types of data in same bucket: %s, %s\n"
"while marking %s", "while marking %s",
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
...@@ -1397,6 +1397,16 @@ static int bch2_alloc_write_key(struct btree_trans *trans, ...@@ -1397,6 +1397,16 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
if (gen_after(old->gen, gc.gen)) if (gen_after(old->gen, gc.gen))
return 0; return 0;
if (c->opts.reconstruct_alloc ||
fsck_err_on(new.data_type != gc.data_type, c,
"bucket %llu:%llu gen %u has wrong data_type"
": got %s, should be %s",
iter->pos.inode, iter->pos.offset,
gc.gen,
bch2_data_types[new.data_type],
bch2_data_types[gc.data_type]))
new.data_type = gc.data_type;
#define copy_bucket_field(_f) \ #define copy_bucket_field(_f) \
if (c->opts.reconstruct_alloc || \ if (c->opts.reconstruct_alloc || \
fsck_err_on(new._f != gc._f, c, \ fsck_err_on(new._f != gc._f, c, \
...@@ -1409,7 +1419,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans, ...@@ -1409,7 +1419,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
new._f = gc._f; \ new._f = gc._f; \
copy_bucket_field(gen); copy_bucket_field(gen);
copy_bucket_field(data_type);
copy_bucket_field(dirty_sectors); copy_bucket_field(dirty_sectors);
copy_bucket_field(cached_sectors); copy_bucket_field(cached_sectors);
copy_bucket_field(stripe_redundancy); copy_bucket_field(stripe_redundancy);
......
...@@ -776,7 +776,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, ...@@ -776,7 +776,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
unsigned nr_data = s->nr_blocks - s->nr_redundant; unsigned nr_data = s->nr_blocks - s->nr_redundant;
bool parity = ptr_idx >= nr_data; bool parity = ptr_idx >= nr_data;
enum bch_data_type data_type = parity ? BCH_DATA_parity : 0; enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe;
s64 sectors = parity ? le16_to_cpu(s->sectors) : 0; s64 sectors = parity ? le16_to_cpu(s->sectors) : 0;
const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx; const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
...@@ -811,7 +811,6 @@ static int mark_stripe_bucket(struct btree_trans *trans, ...@@ -811,7 +811,6 @@ static int mark_stripe_bucket(struct btree_trans *trans,
if (ret) if (ret)
goto err; goto err;
if (data_type)
g->data_type = data_type; g->data_type = data_type;
g->dirty_sectors += sectors; g->dirty_sectors += sectors;
...@@ -851,15 +850,17 @@ static int __mark_pointer(struct btree_trans *trans, ...@@ -851,15 +850,17 @@ static int __mark_pointer(struct btree_trans *trans,
} }
static int bch2_mark_pointer(struct btree_trans *trans, static int bch2_mark_pointer(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c k, struct bkey_s_c k,
struct extent_ptr_decoded p, struct extent_ptr_decoded p,
s64 sectors, enum bch_data_type data_type, s64 sectors,
unsigned flags) unsigned flags)
{ {
u64 journal_seq = trans->journal_res.seq; u64 journal_seq = trans->journal_res.seq;
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct bucket old, new, *g; struct bucket old, new, *g;
enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
u8 bucket_data_type; u8 bucket_data_type;
int ret = 0; int ret = 0;
...@@ -963,8 +964,7 @@ int bch2_mark_extent(struct btree_trans *trans, ...@@ -963,8 +964,7 @@ int bch2_mark_extent(struct btree_trans *trans,
if (flags & BTREE_TRIGGER_OVERWRITE) if (flags & BTREE_TRIGGER_OVERWRITE)
disk_sectors = -disk_sectors; disk_sectors = -disk_sectors;
ret = bch2_mark_pointer(trans, k, p, disk_sectors, ret = bch2_mark_pointer(trans, btree_id, level, k, p, disk_sectors, flags);
data_type, flags);
if (ret < 0) if (ret < 0)
return ret; return ret;
...@@ -1596,6 +1596,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, ...@@ -1596,6 +1596,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
a->v.stripe = s.k->p.offset; a->v.stripe = s.k->p.offset;
a->v.stripe_redundancy = s.v->nr_redundant; a->v.stripe_redundancy = s.v->nr_redundant;
a->v.data_type = BCH_DATA_stripe;
} else { } else {
if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset || if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset ||
a->v.stripe_redundancy != s.v->nr_redundant, trans, a->v.stripe_redundancy != s.v->nr_redundant, trans,
...@@ -1608,6 +1609,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, ...@@ -1608,6 +1609,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
a->v.stripe = 0; a->v.stripe = 0;
a->v.stripe_redundancy = 0; a->v.stripe_redundancy = 0;
a->v.data_type = alloc_data_type(a->v, BCH_DATA_user);
} }
a->v.dirty_sectors += sectors; a->v.dirty_sectors += sectors;
......
...@@ -55,8 +55,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, ...@@ -55,8 +55,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
a = bch2_alloc_to_v4(k, &_a); a = bch2_alloc_to_v4(k, &_a);
*gen = a->gen; *gen = a->gen;
ret = (a->data_type == BCH_DATA_btree || ret = data_type_movable(a->data_type) &&
a->data_type == BCH_DATA_user) &&
a->fragmentation_lru && a->fragmentation_lru &&
a->fragmentation_lru <= time; a->fragmentation_lru <= time;
...@@ -158,13 +157,18 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c) ...@@ -158,13 +157,18 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
struct bch_dev *ca; struct bch_dev *ca;
unsigned dev_idx; unsigned dev_idx;
s64 wait = S64_MAX, fragmented_allowed, fragmented; s64 wait = S64_MAX, fragmented_allowed, fragmented;
unsigned i;
for_each_rw_member(ca, c, dev_idx) { for_each_rw_member(ca, c, dev_idx) {
struct bch_dev_usage usage = bch2_dev_usage_read(ca); struct bch_dev_usage usage = bch2_dev_usage_read(ca);
fragmented_allowed = ((__dev_buckets_available(ca, usage, RESERVE_none) * fragmented_allowed = ((__dev_buckets_available(ca, usage, RESERVE_none) *
ca->mi.bucket_size) >> 1); ca->mi.bucket_size) >> 1);
fragmented = usage.d[BCH_DATA_user].fragmented; fragmented = 0;
for (i = 0; i < BCH_DATA_NR; i++)
if (data_type_movable(i))
fragmented += usage.d[i].fragmented;
wait = min(wait, max(0LL, fragmented_allowed - fragmented)); wait = min(wait, max(0LL, fragmented_allowed - fragmented));
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment