Commit ad520141 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Fix corruption with writeable snapshots

When partially overwriting an extent in an older snapshot, the existing
extent has to be split.

If the existing extent was overwritten in a different (sibling)
snapshot, we have to ensure that the split won't be visible in the
sibling snapshot.

data_update.c already has code for this,
bch2_insert_snapshot_writeouts() - we just need to move it into
btree_update_leaf.c and change bch2_trans_update_extent() to use it as
well.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent e47a390a
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include "btree_iter.h" #include "btree_iter.h"
#include "journal.h" #include "journal.h"
#include "journal.h"
struct bch_fs; struct bch_fs;
struct btree; struct btree;
...@@ -83,6 +84,28 @@ int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *, ...@@ -83,6 +84,28 @@ int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *,
int bch2_btree_node_update_key_get_iter(struct btree_trans *, int bch2_btree_node_update_key_get_iter(struct btree_trans *,
struct btree *, struct bkey_i *, bool); struct btree *, struct bkey_i *, bool);
int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,
struct bpos, struct bpos);
/*
* For use when splitting extents in existing snapshots:
*
* If @old_pos is an interior snapshot node, iterate over descendent snapshot
* nodes: for every descendent snapshot in whiche @old_pos is overwritten and
* not visible, emit a whiteout at @new_pos.
*/
static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
enum btree_id btree,
struct bpos old_pos,
struct bpos new_pos)
{
if (!btree_type_has_snapshots(btree) ||
bkey_eq(old_pos, new_pos))
return 0;
return __bch2_insert_snapshot_whiteouts(trans, btree, old_pos, new_pos);
}
int bch2_trans_update_extent(struct btree_trans *, struct btree_iter *, int bch2_trans_update_extent(struct btree_trans *, struct btree_iter *,
struct bkey_i *, enum btree_update_flags); struct bkey_i *, enum btree_update_flags);
......
...@@ -1343,6 +1343,69 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans, ...@@ -1343,6 +1343,69 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans,
return ret; return ret;
} }
int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
enum btree_id id,
struct bpos old_pos,
struct bpos new_pos)
{
struct bch_fs *c = trans->c;
struct btree_iter old_iter, new_iter;
struct bkey_s_c old_k, new_k;
snapshot_id_list s;
struct bkey_i *update;
int ret;
if (!bch2_snapshot_has_children(c, old_pos.snapshot))
return 0;
darray_init(&s);
bch2_trans_iter_init(trans, &old_iter, id, old_pos,
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_ALL_SNAPSHOTS);
while ((old_k = bch2_btree_iter_prev(&old_iter)).k &&
!(ret = bkey_err(old_k)) &&
bkey_eq(old_pos, old_k.k->p)) {
struct bpos whiteout_pos =
SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);;
if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) ||
snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot))
continue;
new_k = bch2_bkey_get_iter(trans, &new_iter, id, whiteout_pos,
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_INTENT);
ret = bkey_err(new_k);
if (ret)
break;
if (new_k.k->type == KEY_TYPE_deleted) {
update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
ret = PTR_ERR_OR_ZERO(update);
if (ret)
break;
bkey_init(&update->k);
update->k.p = whiteout_pos;
update->k.type = KEY_TYPE_whiteout;
ret = bch2_trans_update(trans, &new_iter, update,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
}
bch2_trans_iter_exit(trans, &new_iter);
ret = snapshot_list_add(c, &s, old_k.k->p.snapshot);
if (ret)
break;
}
bch2_trans_iter_exit(trans, &old_iter);
darray_exit(&s);
return ret;
}
int bch2_trans_update_extent(struct btree_trans *trans, int bch2_trans_update_extent(struct btree_trans *trans,
struct btree_iter *orig_iter, struct btree_iter *orig_iter,
struct bkey_i *insert, struct bkey_i *insert,
...@@ -1396,8 +1459,10 @@ int bch2_trans_update_extent(struct btree_trans *trans, ...@@ -1396,8 +1459,10 @@ int bch2_trans_update_extent(struct btree_trans *trans,
bch2_cut_back(start, update); bch2_cut_back(start, update);
ret = bch2_btree_insert_nonextent(trans, btree_id, update, ret = bch2_insert_snapshot_whiteouts(trans, btree_id,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); k.k->p, update->k.p) ?:
bch2_btree_insert_nonextent(trans, btree_id, update,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
if (ret) if (ret)
goto err; goto err;
} }
...@@ -1411,7 +1476,9 @@ int bch2_trans_update_extent(struct btree_trans *trans, ...@@ -1411,7 +1476,9 @@ int bch2_trans_update_extent(struct btree_trans *trans,
bch2_cut_front(start, update); bch2_cut_front(start, update);
bch2_cut_back(insert->k.p, update); bch2_cut_back(insert->k.p, update);
ret = bch2_btree_insert_nonextent(trans, btree_id, update, ret = bch2_insert_snapshot_whiteouts(trans, btree_id,
k.k->p, update->k.p) ?:
bch2_btree_insert_nonextent(trans, btree_id, update,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
if (ret) if (ret)
goto err; goto err;
......
...@@ -16,81 +16,6 @@ ...@@ -16,81 +16,6 @@
#include "subvolume.h" #include "subvolume.h"
#include "trace.h" #include "trace.h"
static int insert_snapshot_whiteouts(struct btree_trans *trans,
enum btree_id id,
struct bpos old_pos,
struct bpos new_pos)
{
struct bch_fs *c = trans->c;
struct btree_iter iter, iter2;
struct bkey_s_c k, k2;
snapshot_id_list s;
struct bkey_i *update;
int ret;
if (!btree_type_has_snapshots(id))
return 0;
darray_init(&s);
if (!bch2_snapshot_has_children(c, old_pos.snapshot))
return 0;
bch2_trans_iter_init(trans, &iter, id, old_pos,
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_ALL_SNAPSHOTS);
while (1) {
k = bch2_btree_iter_prev(&iter);
ret = bkey_err(k);
if (ret)
break;
if (!k.k)
break;
if (!bkey_eq(old_pos, k.k->p))
break;
if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot) &&
!snapshot_list_has_ancestor(c, &s, k.k->p.snapshot)) {
struct bpos whiteout_pos = new_pos;
whiteout_pos.snapshot = k.k->p.snapshot;
k2 = bch2_bkey_get_iter(trans, &iter2, id, whiteout_pos,
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_INTENT);
ret = bkey_err(k2);
if (!ret && k2.k->type == KEY_TYPE_deleted) {
update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
ret = PTR_ERR_OR_ZERO(update);
if (ret)
break;
bkey_init(&update->k);
update->k.p = whiteout_pos;
update->k.type = KEY_TYPE_whiteout;
ret = bch2_trans_update(trans, &iter2, update,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
}
bch2_trans_iter_exit(trans, &iter2);
if (ret)
break;
ret = snapshot_list_add(c, &s, k.k->p.snapshot);
if (ret)
break;
}
}
bch2_trans_iter_exit(trans, &iter);
darray_exit(&s);
return ret;
}
static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k) static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k)
{ {
if (trace_move_extent_finish_enabled()) { if (trace_move_extent_finish_enabled()) {
...@@ -327,19 +252,12 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, ...@@ -327,19 +252,12 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
next_pos = insert->k.p; next_pos = insert->k.p;
if (!bkey_eq(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) { ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id,
ret = insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, k.k->p, bkey_start_pos(&insert->k)) ?:
bkey_start_pos(&insert->k)); bch2_insert_snapshot_whiteouts(trans, m->btree_id,
if (ret) k.k->p, insert->k.p);
goto err; if (ret)
} goto err;
if (!bkey_eq(insert->k.p, k.k->p)) {
ret = insert_snapshot_whiteouts(trans, m->btree_id,
k.k->p, insert->k.p);
if (ret)
goto err;
}
ret = bch2_trans_update(trans, &iter, insert, ret = bch2_trans_update(trans, &iter, insert,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment