Commit c6dd04f8 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Mark overwrites from journal replay in initial gc

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent d0734356
...@@ -273,11 +273,40 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r) ...@@ -273,11 +273,40 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
(int) btree_id_to_gc_phase(r); (int) btree_id_to_gc_phase(r);
} }
static int mark_journal_key(struct bch_fs *c, enum btree_id id,
struct bkey_i *insert)
{
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
u8 max_stale;
int ret = 0;
ret = bch2_gc_mark_key(c, bkey_i_to_s_c(insert), &max_stale, true);
if (ret)
return ret;
bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, id, bkey_start_pos(&insert->k),
BTREE_ITER_SLOTS, k) {
percpu_down_read(&c->mark_lock);
ret = bch2_mark_overwrite(&trans, iter, k, insert, NULL,
BCH_BUCKET_MARK_GC|
BCH_BUCKET_MARK_NOATOMIC);
percpu_up_read(&c->mark_lock);
if (!ret)
break;
}
return bch2_trans_exit(&trans);
}
static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys, static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys,
bool initial, bool metadata_only) bool initial, bool metadata_only)
{ {
enum btree_id ids[BTREE_ID_NR]; enum btree_id ids[BTREE_ID_NR];
u8 max_stale;
unsigned i; unsigned i;
for (i = 0; i < BTREE_ID_NR; i++) for (i = 0; i < BTREE_ID_NR; i++)
...@@ -299,9 +328,7 @@ static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys, ...@@ -299,9 +328,7 @@ static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys,
for_each_journal_key(*journal_keys, j) for_each_journal_key(*journal_keys, j)
if (j->btree_id == id) { if (j->btree_id == id) {
ret = bch2_gc_mark_key(c, ret = mark_journal_key(c, id, j->k);
bkey_i_to_s_c(j->k),
&max_stale, initial);
if (ret) if (ret)
return ret; return ret;
} }
......
...@@ -43,6 +43,7 @@ enum { ...@@ -43,6 +43,7 @@ enum {
__BTREE_INSERT_USE_ALLOC_RESERVE, __BTREE_INSERT_USE_ALLOC_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY, __BTREE_INSERT_JOURNAL_REPLAY,
__BTREE_INSERT_JOURNAL_RESERVED, __BTREE_INSERT_JOURNAL_RESERVED,
__BTREE_INSERT_NOMARK_OVERWRITES,
__BTREE_INSERT_NOMARK, __BTREE_INSERT_NOMARK,
__BTREE_INSERT_NOWAIT, __BTREE_INSERT_NOWAIT,
__BTREE_INSERT_GC_LOCK_HELD, __BTREE_INSERT_GC_LOCK_HELD,
...@@ -76,6 +77,9 @@ enum { ...@@ -76,6 +77,9 @@ enum {
#define BTREE_INSERT_JOURNAL_RESERVED (1 << __BTREE_INSERT_JOURNAL_RESERVED) #define BTREE_INSERT_JOURNAL_RESERVED (1 << __BTREE_INSERT_JOURNAL_RESERVED)
/* Don't mark overwrites, just new key: */
#define BTREE_INSERT_NOMARK_OVERWRITES (1 << __BTREE_INSERT_NOMARK_OVERWRITES)
/* Don't call bch2_mark_key: */ /* Don't call bch2_mark_key: */
#define BTREE_INSERT_NOMARK (1 << __BTREE_INSERT_NOMARK) #define BTREE_INSERT_NOMARK (1 << __BTREE_INSERT_NOMARK)
......
...@@ -542,6 +542,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, ...@@ -542,6 +542,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
btree_trans_lock_write(c, trans); btree_trans_lock_write(c, trans);
if (likely(!(trans->flags & BTREE_INSERT_NOMARK))) {
trans_for_each_update_iter(trans, i) { trans_for_each_update_iter(trans, i) {
if (i->deferred || if (i->deferred ||
!btree_node_type_needs_gc(i->iter->btree_id)) !btree_node_type_needs_gc(i->iter->btree_id))
...@@ -558,6 +559,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, ...@@ -558,6 +559,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
goto out; goto out;
} }
} }
}
if (race_fault()) { if (race_fault()) {
ret = -EINTR; ret = -EINTR;
...@@ -602,6 +604,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, ...@@ -602,6 +604,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
linked->flags |= BTREE_ITER_NOUNLOCK; linked->flags |= BTREE_ITER_NOUNLOCK;
} }
if (likely(!(trans->flags & BTREE_INSERT_NOMARK))) {
trans_for_each_update_iter(trans, i) trans_for_each_update_iter(trans, i)
bch2_mark_update(trans, i, fs_usage, 0); bch2_mark_update(trans, i, fs_usage, 0);
if (fs_usage) if (fs_usage)
...@@ -613,6 +616,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, ...@@ -613,6 +616,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
bch2_mark_update(trans, i, NULL, bch2_mark_update(trans, i, NULL,
BCH_BUCKET_MARK_GC); BCH_BUCKET_MARK_GC);
} }
}
trans_for_each_update(trans, i) trans_for_each_update(trans, i)
do_btree_insert_one(trans, i); do_btree_insert_one(trans, i);
......
...@@ -1035,70 +1035,86 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, ...@@ -1035,70 +1035,86 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
return ret; return ret;
} }
void bch2_mark_update(struct btree_trans *trans, inline bool bch2_mark_overwrite(struct btree_trans *trans,
struct btree_insert_entry *insert, struct btree_iter *iter,
struct bkey_s_c old,
struct bkey_i *new,
struct bch_fs_usage *fs_usage, struct bch_fs_usage *fs_usage,
unsigned flags) unsigned flags)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter *iter = insert->iter;
struct btree *b = iter->l[0].b; struct btree *b = iter->l[0].b;
struct btree_node_iter node_iter = iter->l[0].iter;
struct bkey_packed *_k;
if (!btree_node_type_needs_gc(iter->btree_id))
return;
if (!(trans->flags & BTREE_INSERT_NOMARK))
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
bpos_min(insert->k->k.p, b->key.k.p).offset -
bkey_start_offset(&insert->k->k),
fs_usage, trans->journal_res.seq, flags);
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
KEY_TYPE_discard))) {
struct bkey unpacked;
struct bkey_s_c k;
s64 sectors = 0; s64 sectors = 0;
k = bkey_disassemble(b, _k, &unpacked);
if (btree_node_is_extents(b) if (btree_node_is_extents(b)
? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0 ? bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0
: bkey_cmp(insert->k->k.p, k.k->p)) : bkey_cmp(new->k.p, old.k->p))
break; return false;
if (btree_node_is_extents(b)) { if (btree_node_is_extents(b)) {
switch (bch2_extent_overlap(&insert->k->k, k.k)) { switch (bch2_extent_overlap(&new->k, old.k)) {
case BCH_EXTENT_OVERLAP_ALL: case BCH_EXTENT_OVERLAP_ALL:
sectors = -((s64) k.k->size); sectors = -((s64) old.k->size);
break; break;
case BCH_EXTENT_OVERLAP_BACK: case BCH_EXTENT_OVERLAP_BACK:
sectors = bkey_start_offset(&insert->k->k) - sectors = bkey_start_offset(&new->k) -
k.k->p.offset; old.k->p.offset;
break; break;
case BCH_EXTENT_OVERLAP_FRONT: case BCH_EXTENT_OVERLAP_FRONT:
sectors = bkey_start_offset(k.k) - sectors = bkey_start_offset(old.k) -
insert->k->k.p.offset; new->k.p.offset;
break; break;
case BCH_EXTENT_OVERLAP_MIDDLE: case BCH_EXTENT_OVERLAP_MIDDLE:
sectors = k.k->p.offset - insert->k->k.p.offset; sectors = old.k->p.offset - new->k.p.offset;
BUG_ON(sectors <= 0); BUG_ON(sectors <= 0);
bch2_mark_key_locked(c, k, true, sectors, bch2_mark_key_locked(c, old, true, sectors,
fs_usage, trans->journal_res.seq, fs_usage, trans->journal_res.seq,
flags); flags);
sectors = bkey_start_offset(&insert->k->k) - sectors = bkey_start_offset(&new->k) -
k.k->p.offset; old.k->p.offset;
break; break;
} }
BUG_ON(sectors >= 0); BUG_ON(sectors >= 0);
} }
bch2_mark_key_locked(c, k, false, sectors, bch2_mark_key_locked(c, old, false, sectors,
fs_usage, trans->journal_res.seq, flags); fs_usage, trans->journal_res.seq, flags);
return true;
}
void bch2_mark_update(struct btree_trans *trans,
struct btree_insert_entry *insert,
struct bch_fs_usage *fs_usage,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter = insert->iter;
struct btree *b = iter->l[0].b;
struct btree_node_iter node_iter = iter->l[0].iter;
struct bkey_packed *_k;
if (!btree_node_type_needs_gc(iter->btree_id))
return;
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
bpos_min(insert->k->k.p, b->key.k.p).offset -
bkey_start_offset(&insert->k->k),
fs_usage, trans->journal_res.seq, flags);
if (unlikely(trans->flags & BTREE_INSERT_NOMARK_OVERWRITES))
return;
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
KEY_TYPE_discard))) {
struct bkey unpacked;
struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked);
if (!bch2_mark_overwrite(trans, iter, k, insert->k,
fs_usage, flags))
break;
bch2_btree_node_iter_advance(&node_iter, b); bch2_btree_node_iter_advance(&node_iter, b);
} }
......
...@@ -254,6 +254,9 @@ int bch2_mark_key(struct bch_fs *, struct bkey_s_c, ...@@ -254,6 +254,9 @@ int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *, int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
struct disk_reservation *); struct disk_reservation *);
bool bch2_mark_overwrite(struct btree_trans *, struct btree_iter *,
struct bkey_s_c, struct bkey_i *,
struct bch_fs_usage *, unsigned);
void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *, void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
struct bch_fs_usage *, unsigned); struct bch_fs_usage *, unsigned);
void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *); void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
......
...@@ -203,63 +203,94 @@ static void replay_now_at(struct journal *j, u64 seq) ...@@ -203,63 +203,94 @@ static void replay_now_at(struct journal *j, u64 seq)
static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k) static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
{ {
struct btree_trans trans; struct btree_trans trans;
struct btree_iter *iter; struct btree_iter *iter, *split_iter;
/* /*
* We might cause compressed extents to be * We might cause compressed extents to be split, so we need to pass in
* split, so we need to pass in a * a disk_reservation:
* disk_reservation:
*/ */
struct disk_reservation disk_res = struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0); bch2_disk_reservation_init(c, 0);
BKEY_PADDED(k) split; struct bkey_i *split;
bool split_compressed = false;
unsigned flags = BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_JOURNAL_REPLAY|
BTREE_INSERT_NOMARK;
int ret; int ret;
bch2_trans_init(&trans, c); bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
retry:
bch2_trans_begin(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
bkey_start_pos(&k->k), bkey_start_pos(&k->k),
BTREE_ITER_INTENT); BTREE_ITER_INTENT);
do { do {
ret = bch2_btree_iter_traverse(iter); ret = bch2_btree_iter_traverse(iter);
if (ret) if (ret)
break; goto err;
bkey_copy(&split.k, k); split_iter = bch2_trans_copy_iter(&trans, iter);
bch2_cut_front(iter->pos, &split.k); ret = PTR_ERR_OR_ZERO(split_iter);
bch2_extent_trim_atomic(&split.k, iter); if (ret)
goto err;
split = bch2_trans_kmalloc(&trans, bkey_bytes(&k->k));
ret = PTR_ERR_OR_ZERO(split);
if (ret)
goto err;
if (!split_compressed &&
bch2_extent_is_compressed(bkey_i_to_s_c(k)) &&
!bch2_extent_is_atomic(k, split_iter)) {
ret = bch2_disk_reservation_add(c, &disk_res, ret = bch2_disk_reservation_add(c, &disk_res,
split.k.k.size * k->k.size *
bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&split.k)), bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(k)),
BCH_DISK_RESERVATION_NOFAIL); BCH_DISK_RESERVATION_NOFAIL);
BUG_ON(ret); BUG_ON(ret);
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &split.k)); flags &= ~BTREE_INSERT_JOURNAL_REPLAY;
ret = bch2_trans_commit(&trans, &disk_res, NULL, flags &= ~BTREE_INSERT_NOMARK;
BTREE_INSERT_ATOMIC| flags |= BTREE_INSERT_NOMARK_OVERWRITES;
BTREE_INSERT_NOFAIL| split_compressed = true;
BTREE_INSERT_LAZY_RW| }
BTREE_INSERT_JOURNAL_REPLAY);
} while ((!ret || ret == -EINTR) &&
bkey_cmp(k->k.p, iter->pos));
bch2_disk_reservation_put(c, &disk_res); bkey_copy(split, k);
bch2_cut_front(split_iter->pos, split);
bch2_extent_trim_atomic(split, split_iter);
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(split_iter, split));
bch2_btree_iter_set_pos(iter, split->k.p);
} while (bkey_cmp(iter->pos, k->k.p) < 0);
ret = bch2_trans_commit(&trans, &disk_res, NULL, flags);
if (ret)
goto err;
if (split_compressed) {
/* /*
* This isn't strictly correct - we should only be relying on the btree * This isn't strictly correct - we should only be relying on
* node lock for synchronization with gc when we've got a write lock * the btree node lock for synchronization with gc when we've
* held. * got a write lock held.
* *
* but - there are other correctness issues if btree gc were to run * but - there are other correctness issues if btree gc were to
* before journal replay finishes * run before journal replay finishes
*/ */
BUG_ON(c->gc_pos.phase); BUG_ON(c->gc_pos.phase);
bch2_mark_key(c, bkey_i_to_s_c(k), false, -((s64) k->k.size), bch2_mark_key(c, bkey_i_to_s_c(k), false, -((s64) k->k.size),
NULL, 0, 0); NULL, 0, 0);
bch2_trans_exit(&trans); }
err:
if (ret == -EINTR)
goto retry;
return ret; bch2_disk_reservation_put(c, &disk_res);
return bch2_trans_exit(&trans) ?: ret;
} }
static int bch2_journal_replay(struct bch_fs *c, static int bch2_journal_replay(struct bch_fs *c,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment