Commit 271a3d3a authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: lift ordering restriction on 0 size extents

This lifts the restriction that 0 size extents must not overlap with
other extents, which means we can now sort extents and non extents the
same way, and will let us simplify a bunch of other stuff as well.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 0fdf1804
...@@ -18,6 +18,9 @@ ...@@ -18,6 +18,9 @@
#include <linux/random.h> #include <linux/random.h>
#include <linux/prefetch.h> #include <linux/prefetch.h>
static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *,
struct btree *);
struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k) struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k)
{ {
unsigned offset = __btree_node_key_to_offset(b, k); unsigned offset = __btree_node_key_to_offset(b, k);
...@@ -63,8 +66,8 @@ void bch2_dump_bset(struct btree *b, struct bset *i, unsigned set) ...@@ -63,8 +66,8 @@ void bch2_dump_bset(struct btree *b, struct bset *i, unsigned set)
_n = bkey_next(_k); _n = bkey_next(_k);
bch2_bkey_to_text(buf, sizeof(buf), &k); bch2_bkey_to_text(buf, sizeof(buf), &k);
printk(KERN_ERR "block %u key %zi/%u: %s\n", set, printk(KERN_ERR "block %u key %5u: %s\n", set,
_k->_data - i->_data, i->u64s, buf); __btree_node_key_to_offset(b, _k), buf);
if (_n == vstruct_last(i)) if (_n == vstruct_last(i))
continue; continue;
...@@ -120,20 +123,6 @@ void bch2_dump_btree_node_iter(struct btree *b, ...@@ -120,20 +123,6 @@ void bch2_dump_btree_node_iter(struct btree *b,
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
static bool keys_out_of_order(struct btree *b,
const struct bkey_packed *prev,
const struct bkey_packed *next,
bool is_extents)
{
struct bkey nextu = bkey_unpack_key(b, next);
return bkey_cmp_left_packed_byval(b, prev, bkey_start_pos(&nextu)) > 0 ||
((is_extents
? !bkey_deleted(next)
: !bkey_deleted(prev)) &&
!bkey_cmp_packed(b, prev, next));
}
void __bch2_verify_btree_nr_keys(struct btree *b) void __bch2_verify_btree_nr_keys(struct btree *b)
{ {
struct bset_tree *t; struct bset_tree *t;
...@@ -150,16 +139,21 @@ void __bch2_verify_btree_nr_keys(struct btree *b) ...@@ -150,16 +139,21 @@ void __bch2_verify_btree_nr_keys(struct btree *b)
BUG_ON(memcmp(&nr, &b->nr, sizeof(nr))); BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
} }
static void bch2_btree_node_iter_next_check(struct btree_node_iter *iter, static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter,
struct btree *b, struct btree *b)
struct bkey_packed *k)
{ {
const struct bkey_packed *n = bch2_btree_node_iter_peek_all(iter, b); struct btree_node_iter iter = *_iter;
const struct bkey_packed *k, *n;
k = bch2_btree_node_iter_peek_all(&iter, b);
__bch2_btree_node_iter_advance(&iter, b);
n = bch2_btree_node_iter_peek_all(&iter, b);
bkey_unpack_key(b, k); bkey_unpack_key(b, k);
if (n && if (n &&
keys_out_of_order(b, k, n, iter->is_extents)) { __btree_node_iter_cmp(b, k, n) > 0) {
struct btree_node_iter_set *set;
struct bkey ku = bkey_unpack_key(b, k); struct bkey ku = bkey_unpack_key(b, k);
struct bkey nu = bkey_unpack_key(b, n); struct bkey nu = bkey_unpack_key(b, n);
char buf1[80], buf2[80]; char buf1[80], buf2[80];
...@@ -167,7 +161,17 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *iter, ...@@ -167,7 +161,17 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
bch2_dump_btree_node(b); bch2_dump_btree_node(b);
bch2_bkey_to_text(buf1, sizeof(buf1), &ku); bch2_bkey_to_text(buf1, sizeof(buf1), &ku);
bch2_bkey_to_text(buf2, sizeof(buf2), &nu); bch2_bkey_to_text(buf2, sizeof(buf2), &nu);
panic("out of order/overlapping:\n%s\n%s\n", buf1, buf2); printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n",
buf1, buf2);
printk(KERN_ERR "iter was:");
btree_node_iter_for_each(_iter, set) {
struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
struct bset_tree *t = bch2_bkey_to_bset(b, k);
printk(" [%zi %zi]", t - b->set,
k->_data - bset(b, t)->_data);
}
panic("\n");
} }
} }
...@@ -196,72 +200,72 @@ void bch2_btree_node_iter_verify(struct btree_node_iter *iter, ...@@ -196,72 +200,72 @@ void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
/* Verify iterator is sorted: */ /* Verify iterator is sorted: */
btree_node_iter_for_each(iter, set) btree_node_iter_for_each(iter, set)
BUG_ON(set != iter->data && BUG_ON(set != iter->data &&
btree_node_iter_cmp(iter, b, set[-1], set[0]) > 0); btree_node_iter_cmp(b, set[-1], set[0]) > 0);
} }
void bch2_verify_key_order(struct btree *b, void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
struct btree_node_iter *iter, struct bkey_packed *insert, unsigned clobber_u64s)
struct bkey_packed *where)
{ {
struct bset_tree *t = bch2_bkey_to_bset(b, where); struct bset_tree *t = bch2_bkey_to_bset(b, where);
struct bkey_packed *k, *prev; struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where);
struct bkey uk, uw = bkey_unpack_key(b, where); struct bkey_packed *next = (void *) (where->_data + clobber_u64s);
#if 0
k = bch2_bkey_prev_all(b, t, where); BUG_ON(prev &&
if (k && __btree_node_iter_cmp(b, prev, insert) > 0);
keys_out_of_order(b, k, where, iter->is_extents)) { #else
char buf1[100], buf2[100]; if (prev &&
__btree_node_iter_cmp(b, prev, insert) > 0) {
struct bkey k1 = bkey_unpack_key(b, prev);
struct bkey k2 = bkey_unpack_key(b, insert);
char buf1[100];
char buf2[100];
bch2_dump_btree_node(b); bch2_dump_btree_node(b);
uk = bkey_unpack_key(b, k); bch2_bkey_to_text(buf1, sizeof(buf1), &k1);
bch2_bkey_to_text(buf1, sizeof(buf1), &uk); bch2_bkey_to_text(buf2, sizeof(buf2), &k2);
bch2_bkey_to_text(buf2, sizeof(buf2), &uw);
panic("out of order with prev:\n%s\n%s\n", panic("prev > insert:\n"
buf1, buf2); "prev key %5u %s\n"
"insert key %5u %s\n",
__btree_node_key_to_offset(b, prev), buf1,
__btree_node_key_to_offset(b, insert), buf2);
} }
#endif
#if 0
BUG_ON(next != btree_bkey_last(b, t) &&
__btree_node_iter_cmp(b, insert, next) > 0);
#else
if (next != btree_bkey_last(b, t) &&
__btree_node_iter_cmp(b, insert, next) > 0) {
struct bkey k1 = bkey_unpack_key(b, insert);
struct bkey k2 = bkey_unpack_key(b, next);
char buf1[100];
char buf2[100];
k = bkey_next(where); bch2_dump_btree_node(b);
BUG_ON(k != btree_bkey_last(b, t) && bch2_bkey_to_text(buf1, sizeof(buf1), &k1);
keys_out_of_order(b, where, k, iter->is_extents)); bch2_bkey_to_text(buf2, sizeof(buf2), &k2);
for_each_bset(b, t) { panic("insert > next:\n"
if (where >= btree_bkey_first(b, t) || "insert key %5u %s\n"
where < btree_bkey_last(b, t)) "next key %5u %s\n",
continue; __btree_node_key_to_offset(b, insert), buf1,
__btree_node_key_to_offset(b, next), buf2);
k = bch2_btree_node_iter_bset_pos(iter, b, t);
if (k == btree_bkey_last(b, t))
k = bch2_bkey_prev_all(b, t, k);
while (bkey_cmp_left_packed_byval(b, k, bkey_start_pos(&uw)) > 0 &&
(prev = bch2_bkey_prev_all(b, t, k)))
k = prev;
for (;
k != btree_bkey_last(b, t);
k = bkey_next(k)) {
uk = bkey_unpack_key(b, k);
if (iter->is_extents) {
BUG_ON(!(bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0 ||
bkey_cmp(uk.p, bkey_start_pos(&uw)) <= 0));
} else {
BUG_ON(!bkey_cmp(uw.p, uk.p) &&
!bkey_deleted(&uk));
} }
#endif
}
if (bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0) void bch2_verify_key_order(struct btree *b,
break; struct btree_node_iter *_iter,
} struct bkey_packed *where)
} {
bch2_verify_insert_pos(b, where, where, where->u64s);
} }
#else #else
static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter, static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
struct btree *b, struct btree *b) {}
struct bkey_packed *k) {}
#endif #endif
...@@ -1229,6 +1233,7 @@ void bch2_bset_insert(struct btree *b, ...@@ -1229,6 +1233,7 @@ void bch2_bset_insert(struct btree *b,
struct bkey_packed packed, *src = bkey_to_packed(insert); struct bkey_packed packed, *src = bkey_to_packed(insert);
bch2_bset_verify_rw_aux_tree(b, t); bch2_bset_verify_rw_aux_tree(b, t);
bch2_verify_insert_pos(b, where, bkey_to_packed(insert), clobber_u64s);
if (bch2_bkey_pack_key(&packed, &insert->k, f)) if (bch2_bkey_pack_key(&packed, &insert->k, f))
src = &packed; src = &packed;
...@@ -1255,7 +1260,6 @@ void bch2_bset_insert(struct btree *b, ...@@ -1255,7 +1260,6 @@ void bch2_bset_insert(struct btree *b,
bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s); bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s);
bch2_verify_key_order(b, iter, where);
bch2_verify_btree_nr_keys(b); bch2_verify_btree_nr_keys(b);
} }
...@@ -1461,7 +1465,7 @@ void bch2_btree_node_iter_push(struct btree_node_iter *iter, ...@@ -1461,7 +1465,7 @@ void bch2_btree_node_iter_push(struct btree_node_iter *iter,
noinline __flatten __attribute__((cold)) noinline __flatten __attribute__((cold))
static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter, static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
struct btree *b, struct bpos search, struct btree *b, struct bpos search,
bool strictly_greater, bool is_extents) bool strictly_greater)
{ {
struct bset_tree *t; struct bset_tree *t;
...@@ -1518,7 +1522,7 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter, ...@@ -1518,7 +1522,7 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
*/ */
void bch2_btree_node_iter_init(struct btree_node_iter *iter, void bch2_btree_node_iter_init(struct btree_node_iter *iter,
struct btree *b, struct bpos search, struct btree *b, struct bpos search,
bool strictly_greater, bool is_extents) bool strictly_greater)
{ {
struct bset_tree *t; struct bset_tree *t;
struct bkey_packed p, *packed_search = NULL; struct bkey_packed p, *packed_search = NULL;
...@@ -1526,7 +1530,7 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter, ...@@ -1526,7 +1530,7 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
EBUG_ON(bkey_cmp(search, b->data->min_key) < 0); EBUG_ON(bkey_cmp(search, b->data->min_key) < 0);
bset_aux_tree_verify(b); bset_aux_tree_verify(b);
__bch2_btree_node_iter_init(iter, is_extents); memset(iter, 0, sizeof(*iter));
switch (bch2_bkey_pack_pos_lossy(&p, search, b)) { switch (bch2_bkey_pack_pos_lossy(&p, search, b)) {
case BKEY_PACK_POS_EXACT: case BKEY_PACK_POS_EXACT:
...@@ -1537,7 +1541,7 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter, ...@@ -1537,7 +1541,7 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
break; break;
case BKEY_PACK_POS_FAIL: case BKEY_PACK_POS_FAIL:
btree_node_iter_init_pack_failed(iter, b, search, btree_node_iter_init_pack_failed(iter, b, search,
strictly_greater, is_extents); strictly_greater);
return; return;
} }
...@@ -1552,12 +1556,11 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter, ...@@ -1552,12 +1556,11 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
} }
void bch2_btree_node_iter_init_from_start(struct btree_node_iter *iter, void bch2_btree_node_iter_init_from_start(struct btree_node_iter *iter,
struct btree *b, struct btree *b)
bool is_extents)
{ {
struct bset_tree *t; struct bset_tree *t;
__bch2_btree_node_iter_init(iter, is_extents); memset(iter, 0, sizeof(*iter));
for_each_bset(b, t) for_each_bset(b, t)
__bch2_btree_node_iter_push(iter, b, __bch2_btree_node_iter_push(iter, b,
...@@ -1585,7 +1588,7 @@ static inline bool btree_node_iter_sort_two(struct btree_node_iter *iter, ...@@ -1585,7 +1588,7 @@ static inline bool btree_node_iter_sort_two(struct btree_node_iter *iter,
{ {
bool ret; bool ret;
if ((ret = (btree_node_iter_cmp(iter, b, if ((ret = (btree_node_iter_cmp(b,
iter->data[first], iter->data[first],
iter->data[first + 1]) > 0))) iter->data[first + 1]) > 0)))
swap(iter->data[first], iter->data[first + 1]); swap(iter->data[first], iter->data[first + 1]);
...@@ -1640,23 +1643,14 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter, ...@@ -1640,23 +1643,14 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
btree_node_iter_sort_two(iter, b, 1); btree_node_iter_sort_two(iter, b, 1);
} }
/**
* bch_btree_node_iter_advance - advance @iter by one key
*
* Doesn't do debugchecks - for cases where (insert_fixup_extent()) a bset might
* momentarily have out of order extents.
*/
void bch2_btree_node_iter_advance(struct btree_node_iter *iter, void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
struct btree *b) struct btree *b)
{ {
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
struct bkey_packed *k = bch2_btree_node_iter_peek_all(iter, b); bch2_btree_node_iter_verify(iter, b);
bch2_btree_node_iter_next_check(iter, b);
__bch2_btree_node_iter_advance(iter, b);
bch2_btree_node_iter_next_check(iter, b, k);
#else
__bch2_btree_node_iter_advance(iter, b);
#endif #endif
__bch2_btree_node_iter_advance(iter, b);
} }
static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter) static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter)
...@@ -1689,8 +1683,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *ite ...@@ -1689,8 +1683,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *ite
bch2_btree_node_iter_bset_pos(iter, b, t), bch2_btree_node_iter_bset_pos(iter, b, t),
min_key_type); min_key_type);
if (k && if (k &&
(!prev || __btree_node_iter_cmp(iter->is_extents, b, (!prev || __btree_node_iter_cmp(b, k, prev) > 0)) {
k, prev) > 0)) {
prev = k; prev = k;
end = t->end_offset; end = t->end_offset;
} }
...@@ -1723,11 +1716,11 @@ struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *ite ...@@ -1723,11 +1716,11 @@ struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *ite
struct btree_node_iter iter2 = *iter; struct btree_node_iter iter2 = *iter;
if (prev) if (prev)
bch2_btree_node_iter_advance(&iter2, b); __bch2_btree_node_iter_advance(&iter2, b);
while ((k = bch2_btree_node_iter_peek_all(&iter2, b)) != orig_pos) { while ((k = bch2_btree_node_iter_peek_all(&iter2, b)) != orig_pos) {
BUG_ON(k->type >= min_key_type); BUG_ON(k->type >= min_key_type);
bch2_btree_node_iter_advance(&iter2, b); __bch2_btree_node_iter_advance(&iter2, b);
} }
} }
......
...@@ -369,6 +369,17 @@ static inline int bkey_cmp_p_or_unp(const struct btree *b, ...@@ -369,6 +369,17 @@ static inline int bkey_cmp_p_or_unp(const struct btree *b,
return __bch2_bkey_cmp_left_packed_format_checked(b, l, r); return __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
} }
/* Returns true if @k is after iterator position @pos */
static inline bool btree_iter_pos_cmp(struct btree_iter *iter,
const struct bkey *k)
{
int cmp = bkey_cmp(k->p, iter->pos);
return cmp > 0 ||
(cmp == 0 &&
!(iter->flags & BTREE_ITER_IS_EXTENTS) && !bkey_deleted(k));
}
/* Returns true if @k is after iterator position @pos */ /* Returns true if @k is after iterator position @pos */
static inline bool btree_iter_pos_cmp_packed(const struct btree *b, static inline bool btree_iter_pos_cmp_packed(const struct btree *b,
struct bpos *pos, struct bpos *pos,
...@@ -430,20 +441,13 @@ static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k, ...@@ -430,20 +441,13 @@ static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
/* Btree key iteration */ /* Btree key iteration */
static inline void __bch2_btree_node_iter_init(struct btree_node_iter *iter,
bool is_extents)
{
iter->is_extents = is_extents;
memset(iter->data, 0, sizeof(iter->data));
}
void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *, void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *,
const struct bkey_packed *, const struct bkey_packed *,
const struct bkey_packed *); const struct bkey_packed *);
void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *, void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *,
struct bpos, bool, bool); struct bpos, bool);
void bch2_btree_node_iter_init_from_start(struct btree_node_iter *, void bch2_btree_node_iter_init_from_start(struct btree_node_iter *,
struct btree *, bool); struct btree *);
struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *, struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *,
struct btree *, struct btree *,
struct bset_tree *); struct bset_tree *);
...@@ -470,32 +474,21 @@ static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter) ...@@ -470,32 +474,21 @@ static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter)
return __btree_node_iter_set_end(iter, 0); return __btree_node_iter_set_end(iter, 0);
} }
static inline int __btree_node_iter_cmp(bool is_extents, static inline int __btree_node_iter_cmp(struct btree *b,
struct btree *b, const struct bkey_packed *l,
struct bkey_packed *l, const struct bkey_packed *r)
struct bkey_packed *r)
{ {
/* /* When keys compare equal deleted keys come first */
* For non extents, when keys compare equal the deleted keys have to
* come first - so that bch2_btree_node_iter_next_check() can detect
* duplicate nondeleted keys (and possibly other reasons?)
*
* For extents, bkey_deleted() is used as a proxy for k->size == 0, so
* deleted keys have to sort last.
*/
return bkey_cmp_packed(b, l, r) return bkey_cmp_packed(b, l, r)
?: (is_extents ?: (int) bkey_deleted(r) - (int) bkey_deleted(l)
? (int) bkey_deleted(l) - (int) bkey_deleted(r)
: (int) bkey_deleted(r) - (int) bkey_deleted(l))
?: (l > r) - (l < r); ?: (l > r) - (l < r);
} }
static inline int btree_node_iter_cmp(struct btree_node_iter *iter, static inline int btree_node_iter_cmp(struct btree *b,
struct btree *b,
struct btree_node_iter_set l, struct btree_node_iter_set l,
struct btree_node_iter_set r) struct btree_node_iter_set r)
{ {
return __btree_node_iter_cmp(iter->is_extents, b, return __btree_node_iter_cmp(b,
__btree_node_offset_to_key(b, l.k), __btree_node_offset_to_key(b, l.k),
__btree_node_offset_to_key(b, r.k)); __btree_node_offset_to_key(b, r.k));
} }
...@@ -582,21 +575,12 @@ bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b) ...@@ -582,21 +575,12 @@ bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b)
return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_DISCARD + 1); return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_DISCARD + 1);
} }
/*
* Iterates over all _live_ keys - skipping deleted (and potentially
* overlapping) keys
*/
#define for_each_btree_node_key(b, k, iter, _is_extents) \
for (bch2_btree_node_iter_init_from_start((iter), (b), (_is_extents));\
((k) = bch2_btree_node_iter_peek(iter, b)); \
bch2_btree_node_iter_advance(iter, b))
struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *, struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
struct btree *, struct btree *,
struct bkey *); struct bkey *);
#define for_each_btree_node_key_unpack(b, k, iter, _is_extents, unpacked)\ #define for_each_btree_node_key_unpack(b, k, iter, unpacked) \
for (bch2_btree_node_iter_init_from_start((iter), (b), (_is_extents));\ for (bch2_btree_node_iter_init_from_start((iter), (b)); \
(k = bch2_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\ (k = bch2_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\
bch2_btree_node_iter_advance(iter, b)) bch2_btree_node_iter_advance(iter, b))
...@@ -646,6 +630,8 @@ void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *); ...@@ -646,6 +630,8 @@ void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *);
void __bch2_verify_btree_nr_keys(struct btree *); void __bch2_verify_btree_nr_keys(struct btree *);
void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *); void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *);
void bch2_verify_insert_pos(struct btree *, struct bkey_packed *,
struct bkey_packed *, unsigned);
void bch2_verify_key_order(struct btree *, struct btree_node_iter *, void bch2_verify_key_order(struct btree *, struct btree_node_iter *,
struct bkey_packed *); struct bkey_packed *);
...@@ -654,6 +640,10 @@ void bch2_verify_key_order(struct btree *, struct btree_node_iter *, ...@@ -654,6 +640,10 @@ void bch2_verify_key_order(struct btree *, struct btree_node_iter *,
static inline void __bch2_verify_btree_nr_keys(struct btree *b) {} static inline void __bch2_verify_btree_nr_keys(struct btree *b) {}
static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter, static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
struct btree *b) {} struct btree *b) {}
static inline void bch2_verify_insert_pos(struct btree *b,
struct bkey_packed *where,
struct bkey_packed *insert,
unsigned clobber_u64s) {}
static inline void bch2_verify_key_order(struct btree *b, static inline void bch2_verify_key_order(struct btree *b,
struct btree_node_iter *iter, struct btree_node_iter *iter,
struct bkey_packed *where) {} struct bkey_packed *where) {}
......
...@@ -217,7 +217,6 @@ static unsigned btree_gc_mark_node(struct bch_fs *c, struct btree *b) ...@@ -217,7 +217,6 @@ static unsigned btree_gc_mark_node(struct bch_fs *c, struct btree *b)
if (btree_node_has_ptrs(b)) if (btree_node_has_ptrs(b))
for_each_btree_node_key_unpack(b, k, &iter, for_each_btree_node_key_unpack(b, k, &iter,
btree_node_is_extents(b),
&unpacked) { &unpacked) {
bch2_bkey_debugcheck(c, b, k); bch2_bkey_debugcheck(c, b, k);
stale = max(stale, bch2_gc_mark_key(c, type, k, 0)); stale = max(stale, bch2_gc_mark_key(c, type, k, 0));
...@@ -1044,7 +1043,6 @@ static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id) ...@@ -1044,7 +1043,6 @@ static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
struct bkey_s_c k; struct bkey_s_c k;
for_each_btree_node_key_unpack(b, k, &node_iter, for_each_btree_node_key_unpack(b, k, &node_iter,
btree_node_is_extents(b),
&unpacked) { &unpacked) {
ret = bch2_btree_mark_key_initial(c, ret = bch2_btree_mark_key_initial(c,
btree_node_type(b), k); btree_node_type(b), k);
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
/* btree_node_iter_large: */ /* btree_node_iter_large: */
#define btree_node_iter_cmp_heap(h, _l, _r) \ #define btree_node_iter_cmp_heap(h, _l, _r) \
__btree_node_iter_cmp((iter)->is_extents, b, \ __btree_node_iter_cmp(b, \
__btree_node_offset_to_key(b, (_l).k), \ __btree_node_offset_to_key(b, (_l).k), \
__btree_node_offset_to_key(b, (_r).k)) __btree_node_offset_to_key(b, (_r).k))
...@@ -248,6 +248,9 @@ static unsigned sort_extent_whiteouts(struct bkey_packed *dst, ...@@ -248,6 +248,9 @@ static unsigned sort_extent_whiteouts(struct bkey_packed *dst,
sort_iter_sort(iter, sort_extent_whiteouts_cmp); sort_iter_sort(iter, sort_extent_whiteouts_cmp);
while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) { while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) {
if (bkey_deleted(in))
continue;
EBUG_ON(bkeyp_val_u64s(f, in)); EBUG_ON(bkeyp_val_u64s(f, in));
EBUG_ON(in->type != KEY_TYPE_DISCARD); EBUG_ON(in->type != KEY_TYPE_DISCARD);
...@@ -785,8 +788,7 @@ void bch2_btree_sort_into(struct bch_fs *c, ...@@ -785,8 +788,7 @@ void bch2_btree_sort_into(struct bch_fs *c,
bch2_bset_set_no_aux_tree(dst, dst->set); bch2_bset_set_no_aux_tree(dst, dst->set);
bch2_btree_node_iter_init_from_start(&src_iter, src, bch2_btree_node_iter_init_from_start(&src_iter, src);
btree_node_is_extents(src));
if (btree_node_ops(src)->key_normalize || if (btree_node_ops(src)->key_normalize ||
btree_node_ops(src)->key_merge) btree_node_ops(src)->key_merge)
...@@ -1171,7 +1173,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry ...@@ -1171,7 +1173,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
int ret, retry_read = 0, write = READ; int ret, retry_read = 0, write = READ;
iter = mempool_alloc(&c->fill_iter, GFP_NOIO); iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
__bch2_btree_node_iter_large_init(iter, btree_node_is_extents(b)); iter->used = 0;
if (bch2_meta_read_fault("btree")) if (bch2_meta_read_fault("btree"))
btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL, btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL,
......
...@@ -146,20 +146,11 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *, char *); ...@@ -146,20 +146,11 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *, char *);
/* Sorting */ /* Sorting */
struct btree_node_iter_large { struct btree_node_iter_large {
u8 is_extents;
u16 used; u16 used;
struct btree_node_iter_set data[MAX_BSETS]; struct btree_node_iter_set data[MAX_BSETS];
}; };
static inline void
__bch2_btree_node_iter_large_init(struct btree_node_iter_large *iter,
bool is_extents)
{
iter->used = 0;
iter->is_extents = is_extents;
}
void bch2_btree_node_iter_large_advance(struct btree_node_iter_large *, void bch2_btree_node_iter_large_advance(struct btree_node_iter_large *,
struct btree *); struct btree *);
......
...@@ -375,14 +375,20 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter, ...@@ -375,14 +375,20 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
struct btree_node_iter tmp = l->iter; struct btree_node_iter tmp = l->iter;
struct bkey_packed *k; struct bkey_packed *k;
if (iter->uptodate > BTREE_ITER_NEED_PEEK)
return;
bch2_btree_node_iter_verify(&l->iter, b); bch2_btree_node_iter_verify(&l->iter, b);
/* /*
* For interior nodes, the iterator will have skipped past * For interior nodes, the iterator will have skipped past
* deleted keys: * deleted keys:
*
* For extents, the iterator may have skipped past deleted keys (but not
* whiteouts)
*/ */
k = b->level k = b->level || iter->flags & BTREE_ITER_IS_EXTENTS
? bch2_btree_node_iter_prev(&tmp, b) ? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_DISCARD)
: bch2_btree_node_iter_prev_all(&tmp, b); : bch2_btree_node_iter_prev_all(&tmp, b);
if (k && btree_iter_pos_cmp_packed(b, &iter->pos, k, if (k && btree_iter_pos_cmp_packed(b, &iter->pos, k,
iter->flags & BTREE_ITER_IS_EXTENTS)) { iter->flags & BTREE_ITER_IS_EXTENTS)) {
...@@ -390,7 +396,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter, ...@@ -390,7 +396,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
struct bkey uk = bkey_unpack_key(b, k); struct bkey uk = bkey_unpack_key(b, k);
bch2_bkey_to_text(buf, sizeof(buf), &uk); bch2_bkey_to_text(buf, sizeof(buf), &uk);
panic("prev key should be before after pos:\n%s\n%llu:%llu\n", panic("prev key should be before iter pos:\n%s\n%llu:%llu\n",
buf, iter->pos.inode, iter->pos.offset); buf, iter->pos.inode, iter->pos.offset);
} }
...@@ -401,15 +407,16 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter, ...@@ -401,15 +407,16 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
struct bkey uk = bkey_unpack_key(b, k); struct bkey uk = bkey_unpack_key(b, k);
bch2_bkey_to_text(buf, sizeof(buf), &uk); bch2_bkey_to_text(buf, sizeof(buf), &uk);
panic("next key should be before iter pos:\n%llu:%llu\n%s\n", panic("iter should be after current key:\n"
"iter pos %llu:%llu\n"
"cur key %s\n",
iter->pos.inode, iter->pos.offset, buf); iter->pos.inode, iter->pos.offset, buf);
} }
if (iter->uptodate == BTREE_ITER_UPTODATE && BUG_ON(iter->uptodate == BTREE_ITER_UPTODATE &&
(iter->flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES) { (iter->flags & BTREE_ITER_TYPE) == BTREE_ITER_KEYS &&
BUG_ON(!bkey_whiteout(&iter->k) && !bkey_whiteout(&iter->k) &&
bch2_btree_node_iter_end(&l->iter)); bch2_btree_node_iter_end(&l->iter));
}
} }
void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b) void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b)
...@@ -420,6 +427,11 @@ void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b) ...@@ -420,6 +427,11 @@ void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b)
__bch2_btree_iter_verify(linked, b); __bch2_btree_iter_verify(linked, b);
} }
#else
static inline void __bch2_btree_iter_verify(struct btree_iter *iter,
struct btree *b) {}
#endif #endif
static void __bch2_btree_node_iter_fix(struct btree_iter *iter, static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
...@@ -434,7 +446,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter, ...@@ -434,7 +446,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
struct btree_node_iter_set *set; struct btree_node_iter_set *set;
unsigned offset = __btree_node_key_to_offset(b, where); unsigned offset = __btree_node_key_to_offset(b, where);
int shift = new_u64s - clobber_u64s; int shift = new_u64s - clobber_u64s;
unsigned old_end = (int) __btree_node_key_to_offset(b, end) - shift; unsigned old_end = t->end_offset - shift;
btree_node_iter_for_each(node_iter, set) btree_node_iter_for_each(node_iter, set)
if (set->end == old_end) if (set->end == old_end)
...@@ -456,7 +468,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter, ...@@ -456,7 +468,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
} }
return; return;
found: found:
set->end = (int) set->end + shift; set->end = t->end_offset;
/* Iterator hasn't gotten to the key that changed yet: */ /* Iterator hasn't gotten to the key that changed yet: */
if (set->k < offset) if (set->k < offset)
...@@ -517,8 +529,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter, ...@@ -517,8 +529,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
k = bch2_bkey_prev_all(b, t, k = bch2_bkey_prev_all(b, t,
bch2_btree_node_iter_bset_pos(node_iter, b, t)); bch2_btree_node_iter_bset_pos(node_iter, b, t));
if (k && if (k &&
__btree_node_iter_cmp(node_iter, b, __btree_node_iter_cmp(b, k, where) > 0) {
k, where) > 0) {
struct btree_node_iter_set *set; struct btree_node_iter_set *set;
unsigned offset = unsigned offset =
__btree_node_key_to_offset(b, bkey_next(k)); __btree_node_key_to_offset(b, bkey_next(k));
...@@ -557,10 +568,6 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter, ...@@ -557,10 +568,6 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter,
__bch2_btree_node_iter_fix(linked, b, __bch2_btree_node_iter_fix(linked, b,
&linked->l[b->level].iter, t, &linked->l[b->level].iter, t,
where, clobber_u64s, new_u64s); where, clobber_u64s, new_u64s);
/* interior node iterators are... special... */
if (!b->level)
bch2_btree_iter_verify(iter, b);
} }
static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter, static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
...@@ -647,17 +654,6 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b) ...@@ -647,17 +654,6 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
btree_node_unlock(iter, b->level + 1); btree_node_unlock(iter, b->level + 1);
} }
/* Returns true if @k is after iterator position @pos */
static inline bool btree_iter_pos_cmp(struct btree_iter *iter,
const struct bkey *k)
{
int cmp = bkey_cmp(k->p, iter->pos);
return cmp > 0 ||
(cmp == 0 &&
!(iter->flags & BTREE_ITER_IS_EXTENTS) && !bkey_deleted(k));
}
static inline bool btree_iter_pos_after_node(struct btree_iter *iter, static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
struct btree *b) struct btree *b)
{ {
...@@ -679,8 +675,7 @@ static inline void __btree_iter_init(struct btree_iter *iter, ...@@ -679,8 +675,7 @@ static inline void __btree_iter_init(struct btree_iter *iter,
struct btree_iter_level *l = &iter->l[b->level]; struct btree_iter_level *l = &iter->l[b->level];
bch2_btree_node_iter_init(&l->iter, b, iter->pos, bch2_btree_node_iter_init(&l->iter, b, iter->pos,
iter->flags & BTREE_ITER_IS_EXTENTS, iter->flags & BTREE_ITER_IS_EXTENTS);
btree_node_is_extents(b));
/* Skip to first non whiteout: */ /* Skip to first non whiteout: */
if (b->level) if (b->level)
...@@ -1022,7 +1017,9 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) ...@@ -1022,7 +1017,9 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
} }
iter->uptodate = BTREE_ITER_NEED_PEEK; iter->uptodate = BTREE_ITER_NEED_PEEK;
bch2_btree_iter_verify_locks(iter); bch2_btree_iter_verify_locks(iter);
__bch2_btree_iter_verify(iter, iter->l[iter->level].b);
return 0; return 0;
} }
...@@ -1363,9 +1360,10 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter) ...@@ -1363,9 +1360,10 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
} }
static inline struct bkey_s_c static inline struct bkey_s_c
__bch2_btree_iter_peek_slot(struct btree_iter *iter) __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
{ {
struct btree_iter_level *l = &iter->l[0]; struct btree_iter_level *l = &iter->l[0];
struct btree_node_iter node_iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey n; struct bkey n;
int ret; int ret;
...@@ -1376,6 +1374,17 @@ __bch2_btree_iter_peek_slot(struct btree_iter *iter) ...@@ -1376,6 +1374,17 @@ __bch2_btree_iter_peek_slot(struct btree_iter *iter)
bkey_cmp(bkey_start_pos(k.k), iter->pos) == 0) bkey_cmp(bkey_start_pos(k.k), iter->pos) == 0)
__btree_iter_advance(l); __btree_iter_advance(l);
/*
* iterator is now at the correct position for inserting at iter->pos,
* but we need to keep iterating until we find the first non whiteout so
* we know how big a hole we have, if any:
*/
node_iter = l->iter;
if (k.k && bkey_whiteout(k.k))
k = __btree_iter_unpack(iter, l, &iter->k,
bch2_btree_node_iter_peek(&node_iter, l->b));
/* /*
* If we got to the end of the node, check if we need to traverse to the * If we got to the end of the node, check if we need to traverse to the
* next node: * next node:
...@@ -1392,6 +1401,13 @@ __bch2_btree_iter_peek_slot(struct btree_iter *iter) ...@@ -1392,6 +1401,13 @@ __bch2_btree_iter_peek_slot(struct btree_iter *iter)
if (k.k && if (k.k &&
!bkey_whiteout(k.k) && !bkey_whiteout(k.k) &&
bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) { bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) {
/*
* if we skipped forward to find the first non whiteout and
* there _wasn't_ actually a hole, we want the iterator to be
* pointed at the key we found:
*/
l->iter = node_iter;
EBUG_ON(bkey_cmp(k.k->p, iter->pos) < 0); EBUG_ON(bkey_cmp(k.k->p, iter->pos) < 0);
EBUG_ON(bkey_deleted(k.k)); EBUG_ON(bkey_deleted(k.k));
iter->uptodate = BTREE_ITER_UPTODATE; iter->uptodate = BTREE_ITER_UPTODATE;
...@@ -1399,28 +1415,21 @@ __bch2_btree_iter_peek_slot(struct btree_iter *iter) ...@@ -1399,28 +1415,21 @@ __bch2_btree_iter_peek_slot(struct btree_iter *iter)
} }
/* hole */ /* hole */
bkey_init(&n);
n.p = iter->pos;
if (iter->flags & BTREE_ITER_IS_EXTENTS) { /* holes can't span inode numbers: */
if (n.p.offset == KEY_OFFSET_MAX) { if (iter->pos.offset == KEY_OFFSET_MAX) {
if (n.p.inode == KEY_INODE_MAX) if (iter->pos.inode == KEY_INODE_MAX)
return bkey_s_c_null; return bkey_s_c_null;
iter->pos = bkey_successor(iter->pos); iter->pos = bkey_successor(iter->pos);
goto recheck; goto recheck;
} }
if (k.k && bkey_whiteout(k.k)) {
struct btree_node_iter node_iter = l->iter;
k = __btree_iter_unpack(iter, l, &iter->k,
bch2_btree_node_iter_peek(&node_iter, l->b));
}
if (!k.k) if (!k.k)
k.k = &l->b->key.k; k.k = &l->b->key.k;
bkey_init(&n);
n.p = iter->pos;
bch2_key_resize(&n, bch2_key_resize(&n,
min_t(u64, KEY_SIZE_MAX, min_t(u64, KEY_SIZE_MAX,
(k.k->p.inode == n.p.inode (k.k->p.inode == n.p.inode
...@@ -1428,7 +1437,17 @@ __bch2_btree_iter_peek_slot(struct btree_iter *iter) ...@@ -1428,7 +1437,17 @@ __bch2_btree_iter_peek_slot(struct btree_iter *iter)
: KEY_OFFSET_MAX) - : KEY_OFFSET_MAX) -
n.p.offset)); n.p.offset));
EBUG_ON(!n.size); //EBUG_ON(!n.size);
if (!n.size) {
char buf[100];
bch2_dump_btree_node(iter->l[0].b);
bch2_bkey_to_text(buf, sizeof(buf), k.k);
panic("iter at %llu:%llu\n"
"next key %s\n",
iter->pos.inode,
iter->pos.offset,
buf);
} }
iter->k = n; iter->k = n;
...@@ -1436,6 +1455,50 @@ __bch2_btree_iter_peek_slot(struct btree_iter *iter) ...@@ -1436,6 +1455,50 @@ __bch2_btree_iter_peek_slot(struct btree_iter *iter)
return (struct bkey_s_c) { &iter->k, NULL }; return (struct bkey_s_c) { &iter->k, NULL };
} }
static inline struct bkey_s_c
__bch2_btree_iter_peek_slot(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
struct bkey_s_c k;
int ret;
if (iter->flags & BTREE_ITER_IS_EXTENTS)
return __bch2_btree_iter_peek_slot_extents(iter);
recheck:
while ((k = __btree_iter_peek_all(iter, l, &iter->k)).k &&
bkey_deleted(k.k) &&
bkey_cmp(k.k->p, iter->pos) == 0)
__btree_iter_advance(l);
/*
* If we got to the end of the node, check if we need to traverse to the
* next node:
*/
if (unlikely(!k.k && btree_iter_pos_after_node(iter, l->b))) {
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
goto recheck;
}
if (k.k &&
!bkey_deleted(k.k) &&
!bkey_cmp(iter->pos, k.k->p)) {
iter->uptodate = BTREE_ITER_UPTODATE;
return k;
} else {
/* hole */
bkey_init(&iter->k);
iter->k.p = iter->pos;
iter->uptodate = BTREE_ITER_UPTODATE;
return (struct bkey_s_c) { &iter->k, NULL };
}
}
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
{ {
int ret; int ret;
......
...@@ -176,8 +176,6 @@ struct btree_cache { ...@@ -176,8 +176,6 @@ struct btree_cache {
}; };
struct btree_node_iter { struct btree_node_iter {
u8 is_extents;
struct btree_node_iter_set { struct btree_node_iter_set {
u16 k, end; u16 k, end;
} data[MAX_BSETS]; } data[MAX_BSETS];
...@@ -459,9 +457,6 @@ struct btree_root { ...@@ -459,9 +457,6 @@ struct btree_root {
* we're holding the write lock and we know what key is about to be overwritten: * we're holding the write lock and we know what key is about to be overwritten:
*/ */
struct btree_iter;
struct btree_node_iter;
enum btree_insert_ret { enum btree_insert_ret {
BTREE_INSERT_OK, BTREE_INSERT_OK,
/* extent spanned multiple leaf nodes: have to traverse to next node: */ /* extent spanned multiple leaf nodes: have to traverse to next node: */
......
...@@ -35,7 +35,7 @@ static void btree_node_interior_verify(struct btree *b) ...@@ -35,7 +35,7 @@ static void btree_node_interior_verify(struct btree *b)
BUG_ON(!b->level); BUG_ON(!b->level);
bch2_btree_node_iter_init(&iter, b, b->key.k.p, false, false); bch2_btree_node_iter_init(&iter, b, b->key.k.p, false);
#if 1 #if 1
BUG_ON(!(k = bch2_btree_node_iter_peek(&iter, b)) || BUG_ON(!(k = bch2_btree_node_iter_peek(&iter, b)) ||
bkey_cmp_left_packed(b, k, &b->key.k.p)); bkey_cmp_left_packed(b, k, &b->key.k.p));
...@@ -1322,7 +1322,7 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b, ...@@ -1322,7 +1322,7 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE); BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE);
bch2_btree_node_iter_init(&node_iter, b, k->k.p, false, false); bch2_btree_node_iter_init(&node_iter, b, k->k.p, false);
while (!bch2_keylist_empty(keys)) { while (!bch2_keylist_empty(keys)) {
k = bch2_keylist_front(keys); k = bch2_keylist_front(keys);
......
...@@ -65,6 +65,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, ...@@ -65,6 +65,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
bch2_bset_delete(b, k, clobber_u64s); bch2_bset_delete(b, k, clobber_u64s);
bch2_btree_node_iter_fix(iter, b, node_iter, t, bch2_btree_node_iter_fix(iter, b, node_iter, t,
k, clobber_u64s, 0); k, clobber_u64s, 0);
bch2_btree_iter_verify(iter, b);
return true; return true;
} }
...@@ -74,6 +75,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, ...@@ -74,6 +75,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
k->type = KEY_TYPE_DELETED; k->type = KEY_TYPE_DELETED;
bch2_btree_node_iter_fix(iter, b, node_iter, t, k, bch2_btree_node_iter_fix(iter, b, node_iter, t, k,
k->u64s, k->u64s); k->u64s, k->u64s);
bch2_btree_iter_verify(iter, b);
if (bkey_whiteout(&insert->k)) { if (bkey_whiteout(&insert->k)) {
reserve_whiteout(b, k); reserve_whiteout(b, k);
...@@ -99,6 +101,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, ...@@ -99,6 +101,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
if (k->u64s != clobber_u64s || bkey_whiteout(&insert->k)) if (k->u64s != clobber_u64s || bkey_whiteout(&insert->k))
bch2_btree_node_iter_fix(iter, b, node_iter, t, k, bch2_btree_node_iter_fix(iter, b, node_iter, t, k,
clobber_u64s, k->u64s); clobber_u64s, k->u64s);
bch2_btree_iter_verify(iter, b);
return true; return true;
} }
......
...@@ -858,30 +858,34 @@ void bch2_key_resize(struct bkey *k, ...@@ -858,30 +858,34 @@ void bch2_key_resize(struct bkey *k,
* that we have to unpack the key, modify the unpacked key - then this * that we have to unpack the key, modify the unpacked key - then this
* copies/repacks the unpacked to the original as necessary. * copies/repacks the unpacked to the original as necessary.
*/ */
static bool __extent_save(struct btree *b, struct btree_node_iter *iter, static void extent_save(struct btree *b, struct bkey_packed *dst,
struct bkey_packed *dst, struct bkey *src) struct bkey *src)
{ {
struct bkey_format *f = &b->format; struct bkey_format *f = &b->format;
struct bkey_i *dst_unpacked; struct bkey_i *dst_unpacked;
bool ret;
if ((dst_unpacked = packed_to_bkey(dst))) { if ((dst_unpacked = packed_to_bkey(dst)))
dst_unpacked->k = *src; dst_unpacked->k = *src;
ret = true; else
} else { BUG_ON(!bch2_bkey_pack_key(dst, src, f));
ret = bch2_bkey_pack_key(dst, src, f);
}
if (ret && iter)
bch2_verify_key_order(b, iter, dst);
return ret;
} }
static void extent_save(struct btree *b, struct btree_node_iter *iter, static bool extent_i_save(struct btree *b, struct bkey_packed *dst,
struct bkey_packed *dst, struct bkey *src) struct bkey_i *src)
{ {
BUG_ON(!__extent_save(b, iter, dst, src)); struct bkey_format *f = &b->format;
struct bkey_i *dst_unpacked;
struct bkey_packed tmp;
if ((dst_unpacked = packed_to_bkey(dst)))
dst_unpacked->k = src->k;
else if (bch2_bkey_pack_key(&tmp, &src->k, f))
memcpy_u64s(dst, &tmp, f->key_u64s);
else
return false;
memcpy_u64s(bkeyp_val(f, dst), &src->v, bkey_val_u64s(&src->k));
return true;
} }
/* /*
...@@ -1010,7 +1014,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, ...@@ -1010,7 +1014,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
sort_key_next(iter, b, _r); sort_key_next(iter, b, _r);
} else { } else {
__bch2_cut_front(l.k->p, r); __bch2_cut_front(l.k->p, r);
extent_save(b, NULL, rk, r.k); extent_save(b, rk, r.k);
} }
extent_sort_sift(iter, b, _r - iter->data); extent_sort_sift(iter, b, _r - iter->data);
...@@ -1024,7 +1028,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, ...@@ -1024,7 +1028,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
bch2_cut_back(bkey_start_pos(r.k), &tmp.k.k); bch2_cut_back(bkey_start_pos(r.k), &tmp.k.k);
__bch2_cut_front(r.k->p, l); __bch2_cut_front(r.k->p, l);
extent_save(b, NULL, lk, l.k); extent_save(b, lk, l.k);
extent_sort_sift(iter, b, 0); extent_sort_sift(iter, b, 0);
...@@ -1032,7 +1036,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, ...@@ -1032,7 +1036,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
bkey_to_packed(&tmp.k)); bkey_to_packed(&tmp.k));
} else { } else {
bch2_cut_back(bkey_start_pos(r.k), l.k); bch2_cut_back(bkey_start_pos(r.k), l.k);
extent_save(b, NULL, lk, l.k); extent_save(b, lk, l.k);
} }
} }
...@@ -1135,6 +1139,55 @@ extent_insert_should_stop(struct extent_insert_state *s) ...@@ -1135,6 +1139,55 @@ extent_insert_should_stop(struct extent_insert_state *s)
return BTREE_INSERT_OK; return BTREE_INSERT_OK;
} }
static void verify_extent_nonoverlapping(struct btree *b,
struct btree_node_iter *_iter,
struct bkey_i *insert)
{
#ifdef CONFIG_BCACHEFS_DEBUG
struct btree_node_iter iter;
struct bkey_packed *k;
struct bkey uk;
iter = *_iter;
k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_DISCARD);
BUG_ON(k &&
(uk = bkey_unpack_key(b, k),
bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0));
iter = *_iter;
k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_DISCARD);
#if 0
BUG_ON(k &&
(uk = bkey_unpack_key(b, k),
bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0);
#else
if (k &&
(uk = bkey_unpack_key(b, k),
bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0) {
char buf1[100];
char buf2[100];
bch2_bkey_to_text(buf1, sizeof(buf1), &insert->k);
bch2_bkey_to_text(buf2, sizeof(buf2), &uk);
bch2_dump_btree_node(b);
panic("insert > next :\n"
"insert %s\n"
"next %s\n",
buf1, buf2);
}
#endif
#endif
}
static void verify_modified_extent(struct btree_iter *iter,
struct bkey_packed *k)
{
bch2_btree_iter_verify(iter, iter->l[0].b);
bch2_verify_insert_pos(iter->l[0].b, k, k, k->u64s);
}
static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
struct bkey_i *insert) struct bkey_i *insert)
{ {
...@@ -1148,6 +1201,14 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, ...@@ -1148,6 +1201,14 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
unsigned clobber_u64s; unsigned clobber_u64s;
EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size); EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
verify_extent_nonoverlapping(l->b, &l->iter, insert);
if (!prev) {
while ((prev = bch2_bkey_prev_all(l->b, t, where)) &&
(bkey_cmp_left_packed(l->b, prev, &insert->k.p) ?:
((int) bkey_deleted(&insert->k) - (int) bkey_deleted(prev))) > 0)
where = prev;
}
if (prev) if (prev)
where = bkey_next(prev); where = bkey_next(prev);
...@@ -1174,11 +1235,14 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, ...@@ -1174,11 +1235,14 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
bch2_bset_insert(l->b, &l->iter, where, insert, clobber_u64s); bch2_bset_insert(l->b, &l->iter, where, insert, clobber_u64s);
bch2_btree_node_iter_fix(iter, l->b, &l->iter, t, where, bch2_btree_node_iter_fix(iter, l->b, &l->iter, t, where,
clobber_u64s, where->u64s); clobber_u64s, where->u64s);
bch2_verify_key_order(l->b, &l->iter, where);
bch2_btree_iter_verify(iter, l->b);
return; return;
drop_deleted_keys: drop_deleted_keys:
bch2_bset_delete(l->b, where, clobber_u64s); bch2_bset_delete(l->b, where, clobber_u64s);
bch2_btree_node_iter_fix(iter, l->b, &l->iter, t, bch2_btree_node_iter_fix(iter, l->b, &l->iter, t,
where, clobber_u64s, 0); where, clobber_u64s, 0);
bch2_btree_iter_verify(iter, l->b);
} }
static void extent_insert_committed(struct extent_insert_state *s) static void extent_insert_committed(struct extent_insert_state *s)
...@@ -1226,8 +1290,10 @@ static void extent_insert_committed(struct extent_insert_state *s) ...@@ -1226,8 +1290,10 @@ static void extent_insert_committed(struct extent_insert_state *s)
bch2_btree_journal_key(s->trans, iter, &split.k); bch2_btree_journal_key(s->trans, iter, &split.k);
if (!s->deleting) if (!s->deleting) {
bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
extent_bset_insert(c, iter, &split.k); extent_bset_insert(c, iter, &split.k);
}
done: done:
bch2_btree_iter_set_pos_same_leaf(iter, s->committed); bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
...@@ -1345,22 +1411,21 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, ...@@ -1345,22 +1411,21 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
struct btree_iter *iter = s->insert->iter; struct btree_iter *iter = s->insert->iter;
struct btree_iter_level *l = &iter->l[0]; struct btree_iter_level *l = &iter->l[0];
struct btree *b = l->b; struct btree *b = l->b;
struct btree_node_iter *node_iter = &l->iter;
enum btree_insert_ret ret;
switch (overlap) { switch (overlap) {
case BCH_EXTENT_OVERLAP_FRONT: case BCH_EXTENT_OVERLAP_FRONT:
/* insert overlaps with start of k: */ /* insert overlaps with start of k: */
bch2_cut_subtract_front(s, insert->k.p, k); bch2_cut_subtract_front(s, insert->k.p, k);
BUG_ON(bkey_deleted(k.k)); BUG_ON(bkey_deleted(k.k));
extent_save(b, node_iter, _k, k.k); extent_save(b, _k, k.k);
bch2_verify_key_order(b, &l->iter, _k);
break; break;
case BCH_EXTENT_OVERLAP_BACK: case BCH_EXTENT_OVERLAP_BACK:
/* insert overlaps with end of k: */ /* insert overlaps with end of k: */
bch2_cut_subtract_back(s, bkey_start_pos(&insert->k), k); bch2_cut_subtract_back(s, bkey_start_pos(&insert->k), k);
BUG_ON(bkey_deleted(k.k)); BUG_ON(bkey_deleted(k.k));
extent_save(b, node_iter, _k, k.k); extent_save(b, _k, k.k);
/* /*
* As the auxiliary tree is indexed by the end of the * As the auxiliary tree is indexed by the end of the
...@@ -1368,46 +1433,31 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, ...@@ -1368,46 +1433,31 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
* auxiliary tree. * auxiliary tree.
*/ */
bch2_bset_fix_invalidated_key(b, t, _k); bch2_bset_fix_invalidated_key(b, t, _k);
bch2_btree_node_iter_fix(iter, b, node_iter, t, bch2_btree_node_iter_fix(iter, b, &l->iter, t,
_k, _k->u64s, _k->u64s); _k, _k->u64s, _k->u64s);
bch2_verify_key_order(b, &l->iter, _k);
break; break;
case BCH_EXTENT_OVERLAP_ALL: { case BCH_EXTENT_OVERLAP_ALL: {
struct bpos orig_pos = k.k->p;
/* The insert key completely covers k, invalidate k */ /* The insert key completely covers k, invalidate k */
if (!bkey_whiteout(k.k)) if (!bkey_whiteout(k.k))
btree_keys_account_key_drop(&b->nr, btree_keys_account_key_drop(&b->nr,
t - b->set, _k); t - b->set, _k);
bch2_drop_subtract(s, k); bch2_drop_subtract(s, k);
k.k->p = bkey_start_pos(&insert->k);
if (!__extent_save(b, node_iter, _k, k.k)) {
/*
* Couldn't repack: we aren't necessarily able
* to repack if the new key is outside the range
* of the old extent, so we have to split
* @insert:
*/
k.k->p = orig_pos;
extent_save(b, node_iter, _k, k.k);
ret = extent_insert_advance_pos(s, k.s_c); if (t == bset_tree_last(l->b)) {
if (ret != BTREE_INSERT_OK) unsigned u64s = _k->u64s;
return ret;
extent_insert_committed(s); bch2_bset_delete(l->b, _k, _k->u64s);
/* bch2_btree_node_iter_fix(iter, b, &l->iter, t,
* We split and inserted upto at k.k->p - that _k, u64s, 0);
* has to coincide with iter->pos, so that we bch2_btree_iter_verify(iter, b);
* don't have anything more we have to insert
* until we recheck our journal reservation:
*/
EBUG_ON(bkey_cmp(s->committed, k.k->p));
} else { } else {
bch2_bset_fix_invalidated_key(b, t, _k); extent_save(b, _k, k.k);
bch2_btree_node_iter_fix(iter, b, node_iter, t, bch2_btree_node_iter_fix(iter, b, &l->iter, t,
_k, _k->u64s, _k->u64s); _k, _k->u64s, _k->u64s);
bch2_verify_key_order(b, &l->iter, _k);
} }
break; break;
...@@ -1436,7 +1486,8 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, ...@@ -1436,7 +1486,8 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
bch2_cut_subtract_front(s, insert->k.p, k); bch2_cut_subtract_front(s, insert->k.p, k);
BUG_ON(bkey_deleted(k.k)); BUG_ON(bkey_deleted(k.k));
extent_save(b, node_iter, _k, k.k); extent_save(b, _k, k.k);
bch2_verify_key_order(b, &l->iter, _k);
bch2_add_sectors(s, bkey_i_to_s_c(&split.k), bch2_add_sectors(s, bkey_i_to_s_c(&split.k),
bkey_start_offset(&split.k.k), bkey_start_offset(&split.k.k),
...@@ -1450,26 +1501,20 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, ...@@ -1450,26 +1501,20 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
} }
static enum btree_insert_ret static enum btree_insert_ret
__bch2_delete_fixup_extent(struct extent_insert_state *s) __bch2_insert_fixup_extent(struct extent_insert_state *s)
{ {
struct bch_fs *c = s->trans->c; struct bch_fs *c = s->trans->c;
struct btree_iter *iter = s->insert->iter; struct btree_iter *iter = s->insert->iter;
struct btree_iter_level *l = &iter->l[0]; struct btree_iter_level *l = &iter->l[0];
struct btree *b = l->b; struct btree *b = l->b;
struct btree_node_iter *node_iter = &l->iter;
struct bkey_packed *_k; struct bkey_packed *_k;
struct bkey unpacked; struct bkey unpacked;
struct bkey_i *insert = s->insert->k; struct bkey_i *insert = s->insert->k;
enum btree_insert_ret ret = BTREE_INSERT_OK; enum btree_insert_ret ret = BTREE_INSERT_OK;
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)));
s->whiteout = *insert;
s->whiteout.k.type = KEY_TYPE_DISCARD;
while (bkey_cmp(s->committed, insert->k.p) < 0 && while (bkey_cmp(s->committed, insert->k.p) < 0 &&
(ret = extent_insert_should_stop(s)) == BTREE_INSERT_OK && (ret = extent_insert_should_stop(s)) == BTREE_INSERT_OK &&
(_k = bch2_btree_node_iter_peek_all(node_iter, b))) { (_k = bch2_btree_node_iter_peek_filter(&l->iter, b, KEY_TYPE_DISCARD))) {
struct bset_tree *t = bch2_bkey_to_bset(b, _k); struct bset_tree *t = bch2_bkey_to_bset(b, _k);
struct bkey_s k = __bkey_disassemble(b, _k, &unpacked); struct bkey_s k = __bkey_disassemble(b, _k, &unpacked);
enum bch_extent_overlap overlap; enum bch_extent_overlap overlap;
...@@ -1480,17 +1525,28 @@ __bch2_delete_fixup_extent(struct extent_insert_state *s) ...@@ -1480,17 +1525,28 @@ __bch2_delete_fixup_extent(struct extent_insert_state *s)
if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0) if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0)
break; break;
if (bkey_whiteout(k.k)) {
s->committed = bpos_min(insert->k.p, k.k->p);
goto next;
}
overlap = bch2_extent_overlap(&insert->k, k.k);
ret = extent_insert_advance_pos(s, k.s_c); ret = extent_insert_advance_pos(s, k.s_c);
if (ret) if (ret)
break; break;
overlap = bch2_extent_overlap(&insert->k, k.k);
if (!s->deleting) {
if (k.k->needs_whiteout || bkey_written(b, _k))
insert->k.needs_whiteout = true;
if (overlap == BCH_EXTENT_OVERLAP_ALL &&
bkey_whiteout(k.k) &&
k.k->needs_whiteout) {
unreserve_whiteout(b, _k);
_k->needs_whiteout = false;
}
ret = extent_squash(s, insert, t, _k, k, overlap);
} else {
if (bkey_whiteout(k.k))
goto next;
s->do_journal = true; s->do_journal = true;
if (overlap == BCH_EXTENT_OVERLAP_ALL) { if (overlap == BCH_EXTENT_OVERLAP_ALL) {
...@@ -1532,60 +1588,29 @@ __bch2_delete_fixup_extent(struct extent_insert_state *s) ...@@ -1532,60 +1588,29 @@ __bch2_delete_fixup_extent(struct extent_insert_state *s)
bch2_btree_iter_set_pos_same_leaf(iter, s->committed); bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
} }
return ret; if (ret != BTREE_INSERT_OK ||
} overlap == BCH_EXTENT_OVERLAP_FRONT ||
overlap == BCH_EXTENT_OVERLAP_MIDDLE)
static enum btree_insert_ret
__bch2_insert_fixup_extent(struct extent_insert_state *s)
{
struct btree_iter *iter = s->insert->iter;
struct btree_iter_level *l = &iter->l[0];
struct btree *b = l->b;
struct btree_node_iter *node_iter = &l->iter;
struct bkey_packed *_k;
struct bkey unpacked;
struct bkey_i *insert = s->insert->k;
enum btree_insert_ret ret = BTREE_INSERT_OK;
while (bkey_cmp(s->committed, insert->k.p) < 0 &&
(ret = extent_insert_should_stop(s)) == BTREE_INSERT_OK &&
(_k = bch2_btree_node_iter_peek_all(node_iter, b))) {
struct bset_tree *t = bch2_bkey_to_bset(b, _k);
struct bkey_s k = __bkey_disassemble(b, _k, &unpacked);
enum bch_extent_overlap overlap;
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)));
EBUG_ON(bkey_cmp(iter->pos, k.k->p) >= 0);
if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0)
break; break;
}
overlap = bch2_extent_overlap(&insert->k, k.k); if (ret == BTREE_INSERT_OK &&
bkey_cmp(s->committed, insert->k.p) < 0)
if (!k.k->size) ret = extent_insert_advance_pos(s, bkey_s_c_null);
goto squash;
/* /*
* Only call advance pos & call hook for nonzero size extents: * may have skipped past some deleted extents greater than the insert
* key, before we got to a non deleted extent and knew we could bail out
* rewind the iterator a bit if necessary:
*/ */
ret = extent_insert_advance_pos(s, k.s_c); {
if (ret) struct btree_node_iter node_iter = l->iter;
break; struct bkey uk;
if (k.k->size &&
(k.k->needs_whiteout || bkey_written(b, _k)))
insert->k.needs_whiteout = true;
if (overlap == BCH_EXTENT_OVERLAP_ALL && while ((_k = bch2_btree_node_iter_prev_all(&node_iter, l->b)) &&
bkey_whiteout(k.k) && (uk = bkey_unpack_key(l->b, _k),
k.k->needs_whiteout) { bkey_cmp(uk.p, s->committed) > 0))
unreserve_whiteout(b, _k); l->iter = node_iter;
_k->needs_whiteout = false;
}
squash:
ret = extent_squash(s, insert, t, _k, k, overlap);
if (ret != BTREE_INSERT_OK)
break;
} }
return ret; return ret;
...@@ -1647,6 +1672,11 @@ bch2_insert_fixup_extent(struct btree_insert *trans, ...@@ -1647,6 +1672,11 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
.deleting = bkey_whiteout(&insert->k->k), .deleting = bkey_whiteout(&insert->k->k),
}; };
if (s.deleting) {
s.whiteout = *insert->k;
s.whiteout.k.type = KEY_TYPE_DISCARD;
}
EBUG_ON(iter->level); EBUG_ON(iter->level);
EBUG_ON(!insert->k->k.size); EBUG_ON(!insert->k->k.size);
...@@ -1657,6 +1687,7 @@ bch2_insert_fixup_extent(struct btree_insert *trans, ...@@ -1657,6 +1687,7 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
* @insert->k and the node iterator that we're advancing: * @insert->k and the node iterator that we're advancing:
*/ */
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k))); EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
bch2_btree_iter_verify(iter, b);
if (!s.deleting && if (!s.deleting &&
!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
...@@ -1664,13 +1695,7 @@ bch2_insert_fixup_extent(struct btree_insert *trans, ...@@ -1664,13 +1695,7 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
bkey_start_offset(&insert->k->k), bkey_start_offset(&insert->k->k),
insert->k->k.size); insert->k->k.size);
ret = !s.deleting ret = __bch2_insert_fixup_extent(&s);
? __bch2_insert_fixup_extent(&s)
: __bch2_delete_fixup_extent(&s);
if (ret == BTREE_INSERT_OK &&
bkey_cmp(s.committed, insert->k->k.p) < 0)
ret = extent_insert_advance_pos(&s, bkey_s_c_null);
extent_insert_committed(&s); extent_insert_committed(&s);
...@@ -2172,130 +2197,6 @@ enum merge_result bch2_extent_merge(struct bch_fs *c, struct btree *b, ...@@ -2172,130 +2197,6 @@ enum merge_result bch2_extent_merge(struct bch_fs *c, struct btree *b,
return BCH_MERGE_MERGE; return BCH_MERGE_MERGE;
} }
static void extent_i_save(struct btree *b, struct bkey_packed *dst,
struct bkey_i *src)
{
struct bkey_format *f = &b->format;
struct bkey_i *dst_unpacked;
BUG_ON(bkeyp_val_u64s(f, dst) != bkey_val_u64s(&src->k));
/*
* We don't want the bch2_verify_key_order() call in extent_save(),
* because we may be out of order with deleted keys that are about to be
* removed by extent_bset_insert()
*/
if ((dst_unpacked = packed_to_bkey(dst)))
bkey_copy(dst_unpacked, src);
else
BUG_ON(!bch2_bkey_pack(dst, src, f));
}
static bool extent_merge_one_overlapping(struct btree_iter *iter,
struct bpos new_pos,
struct bset_tree *t,
struct bkey_packed *k, struct bkey uk,
bool check, bool could_pack)
{
struct btree_iter_level *l = &iter->l[0];
BUG_ON(!bkey_deleted(k));
if (check) {
return !bkey_packed(k) || could_pack;
} else {
uk.p = new_pos;
extent_save(l->b, &l->iter, k, &uk);
bch2_bset_fix_invalidated_key(l->b, t, k);
bch2_btree_node_iter_fix(iter, l->b, &l->iter, t,
k, k->u64s, k->u64s);
return true;
}
}
static bool extent_merge_do_overlapping(struct btree_iter *iter,
struct bkey *m, bool back_merge)
{
struct btree_iter_level *l = &iter->l[0];
struct btree *b = l->b;
struct btree_node_iter *node_iter = &l->iter;
struct bset_tree *t;
struct bkey_packed *k;
struct bkey uk;
struct bpos new_pos = back_merge ? m->p : bkey_start_pos(m);
bool could_pack = bkey_pack_pos((void *) &uk, new_pos, b);
bool check = true;
/*
* @m is the new merged extent:
*
* The merge took place in the last bset; we know there can't be any 0
* size extents overlapping with m there because if so they would have
* been between the two extents we merged.
*
* But in the other bsets, we have to check for and fix such extents:
*/
do_fixup:
for_each_bset(b, t) {
if (t == bset_tree_last(b))
break;
/*
* if we don't find this bset in the iterator we already got to
* the end of that bset, so start searching from the end.
*/
k = bch2_btree_node_iter_bset_pos(node_iter, b, t);
if (k == btree_bkey_last(b, t))
k = bch2_bkey_prev_all(b, t, k);
if (!k)
continue;
if (back_merge) {
/*
* Back merge: 0 size extents will be before the key
* that was just inserted (and thus the iterator
* position) - walk backwards to find them
*/
for (;
k &&
(uk = bkey_unpack_key(b, k),
bkey_cmp(uk.p, bkey_start_pos(m)) > 0);
k = bch2_bkey_prev_all(b, t, k)) {
if (bkey_cmp(uk.p, m->p) >= 0)
continue;
if (!extent_merge_one_overlapping(iter, new_pos,
t, k, uk, check, could_pack))
return false;
}
} else {
/* Front merge - walk forwards */
for (;
k != btree_bkey_last(b, t) &&
(uk = bkey_unpack_key(b, k),
bkey_cmp(uk.p, m->p) < 0);
k = bkey_next(k)) {
if (bkey_cmp(uk.p,
bkey_start_pos(m)) <= 0)
continue;
if (!extent_merge_one_overlapping(iter, new_pos,
t, k, uk, check, could_pack))
return false;
}
}
}
if (check) {
check = false;
goto do_fixup;
}
return true;
}
/* /*
* When merging an extent that we're inserting into a btree node, the new merged * When merging an extent that we're inserting into a btree node, the new merged
* extent could overlap with an existing 0 size extent - if we don't fix that, * extent could overlap with an existing 0 size extent - if we don't fix that,
...@@ -2312,13 +2213,13 @@ static bool bch2_extent_merge_inline(struct bch_fs *c, ...@@ -2312,13 +2213,13 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
{ {
struct btree *b = iter->l[0].b; struct btree *b = iter->l[0].b;
struct btree_node_iter *node_iter = &iter->l[0].iter; struct btree_node_iter *node_iter = &iter->l[0].iter;
const struct bkey_format *f = &b->format; BKEY_PADDED(k) li, ri;
struct bset_tree *t = bset_tree_last(b); struct bkey_packed *m = back_merge ? l : r;
struct bkey_packed *m; struct bkey_i *mi = back_merge ? &li.k : &ri.k;
BKEY_PADDED(k) li; struct bset_tree *t = bch2_bkey_to_bset(b, m);
BKEY_PADDED(k) ri; enum merge_result ret;
struct bkey_i *mi;
struct bkey tmp; EBUG_ON(bkey_written(b, m));
/* /*
* We need to save copies of both l and r, because we might get a * We need to save copies of both l and r, because we might get a
...@@ -2327,57 +2228,49 @@ static bool bch2_extent_merge_inline(struct bch_fs *c, ...@@ -2327,57 +2228,49 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
bch2_bkey_unpack(b, &li.k, l); bch2_bkey_unpack(b, &li.k, l);
bch2_bkey_unpack(b, &ri.k, r); bch2_bkey_unpack(b, &ri.k, r);
m = back_merge ? l : r; ret = bch2_extent_merge(c, b, &li.k, &ri.k);
mi = back_merge ? &li.k : &ri.k; if (ret == BCH_MERGE_NOMERGE)
return false;
/* l & r should be in last bset: */ /*
EBUG_ON(bch2_bkey_to_bset(b, m) != t); * check if we overlap with deleted extents - would break the sort
* order:
*/
if (back_merge) {
struct bkey_packed *n = bkey_next(m);
switch (bch2_extent_merge(c, b, &li.k, &ri.k)) { if (n != btree_bkey_last(b, t) &&
case BCH_MERGE_NOMERGE: bkey_cmp_left_packed(b, n, &li.k.k.p) <= 0 &&
return false; bkey_deleted(n))
case BCH_MERGE_PARTIAL:
if (bkey_packed(m) && !bch2_bkey_pack_key((void *) &tmp, &mi->k, f))
return false; return false;
} else if (ret == BCH_MERGE_MERGE) {
struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m);
if (!extent_merge_do_overlapping(iter, &li.k.k, back_merge)) if (prev &&
bkey_cmp_left_packed_byval(b, prev,
bkey_start_pos(&li.k.k)) > 0)
return false; return false;
}
extent_i_save(b, m, mi); if (ret == BCH_MERGE_PARTIAL) {
bch2_bset_fix_invalidated_key(b, t, m); if (!extent_i_save(b, m, mi))
return false;
/*
* Update iterator to reflect what we just inserted - otherwise,
* the iter_fix() call is going to put us _before_ the key we
* just partially merged with:
*/
if (back_merge)
bch2_btree_iter_set_pos_same_leaf(iter, li.k.k.p);
bch2_btree_node_iter_fix(iter, b, node_iter,
t, m, m->u64s, m->u64s);
if (!back_merge) if (!back_merge)
bkey_copy(packed_to_bkey(l), &li.k); bkey_copy(packed_to_bkey(l), &li.k);
else else
bkey_copy(packed_to_bkey(r), &ri.k); bkey_copy(packed_to_bkey(r), &ri.k);
} else {
if (!extent_i_save(b, m, &li.k))
return false; return false;
case BCH_MERGE_MERGE: }
if (bkey_packed(m) && !bch2_bkey_pack_key((void *) &tmp, &li.k.k, f))
return false;
if (!extent_merge_do_overlapping(iter, &li.k.k, back_merge))
return false;
extent_i_save(b, m, &li.k);
bch2_bset_fix_invalidated_key(b, t, m); bch2_bset_fix_invalidated_key(b, t, m);
bch2_btree_node_iter_fix(iter, b, node_iter, bch2_btree_node_iter_fix(iter, b, node_iter,
t, m, m->u64s, m->u64s); t, m, m->u64s, m->u64s);
return true; verify_modified_extent(iter, m);
default:
BUG(); return ret == BCH_MERGE_MERGE;
}
} }
int bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size) int bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment