Commit d5c88b73 authored by Jan Schmidt's avatar Jan Schmidt

Btrfs: bugfix: ignore the wrong key for indirect tree block backrefs

The key we store with a tree block backref is only a hint. It is set when
the ref is created and can remain correct for a long time. As the tree is
rebalanced, however, eventually the key no longer points to the correct
destination.

With this patch, we change find_parent_nodes to no longer add keys unless it
knows for sure they're correct (e.g. because they're for an extent data
backref). Then when we later encounter a backref ref with no parent and no
key set, we grab the block and take the first key from the block itself.
Signed-off-by: default avatarJan Schmidt <list.btrfs@jan-o-sch.net>
parent dadcaf78
...@@ -30,16 +30,55 @@ ...@@ -30,16 +30,55 @@
struct __prelim_ref { struct __prelim_ref {
struct list_head list; struct list_head list;
u64 root_id; u64 root_id;
struct btrfs_key key; struct btrfs_key key_for_search;
int level; int level;
int count; int count;
u64 parent; u64 parent;
u64 wanted_disk_byte; u64 wanted_disk_byte;
}; };
/*
* the rules for all callers of this function are:
* - obtaining the parent is the goal
* - if you add a key, you must know that it is a correct key
* - if you cannot add the parent or a correct key, then we will look into the
* block later to set a correct key
*
* delayed refs
* ============
* backref type | shared | indirect | shared | indirect
* information | tree | tree | data | data
* --------------------+--------+----------+--------+----------
* parent logical | y | - | - | -
* key to resolve | - | y | y | y
* tree block logical | - | - | - | -
* root for resolving | y | y | y | y
*
* - column 1: we've the parent -> done
* - column 2, 3, 4: we use the key to find the parent
*
* on disk refs (inline or keyed)
* ==============================
* backref type | shared | indirect | shared | indirect
* information | tree | tree | data | data
* --------------------+--------+----------+--------+----------
* parent logical | y | - | y | -
* key to resolve | - | - | - | y
* tree block logical | y | y | y | y
* root for resolving | - | y | y | y
*
* - column 1, 3: we've the parent -> done
* - column 2: we take the first key from the block to find the parent
* (see __add_missing_keys)
* - column 4: we use the key to find the parent
*
* additional information that's available but not required to find the parent
* block might help in merging entries to gain some speed.
*/
static int __add_prelim_ref(struct list_head *head, u64 root_id, static int __add_prelim_ref(struct list_head *head, u64 root_id,
struct btrfs_key *key, int level, u64 parent, struct btrfs_key *key, int level,
u64 wanted_disk_byte, int count) u64 parent, u64 wanted_disk_byte, int count)
{ {
struct __prelim_ref *ref; struct __prelim_ref *ref;
...@@ -50,9 +89,9 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, ...@@ -50,9 +89,9 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
ref->root_id = root_id; ref->root_id = root_id;
if (key) if (key)
ref->key = *key; ref->key_for_search = *key;
else else
memset(&ref->key, 0, sizeof(ref->key)); memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
ref->level = level; ref->level = level;
ref->count = count; ref->count = count;
...@@ -152,12 +191,13 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, ...@@ -152,12 +191,13 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
goto out; goto out;
path->lowest_level = level; path->lowest_level = level;
ret = btrfs_search_slot(NULL, root, &ref->key, path, 0, 0); ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path, 0, 0);
pr_debug("search slot in root %llu (level %d, ref count %d) returned " pr_debug("search slot in root %llu (level %d, ref count %d) returned "
"%d for key (%llu %u %llu)\n", "%d for key (%llu %u %llu)\n",
(unsigned long long)ref->root_id, level, ref->count, ret, (unsigned long long)ref->root_id, level, ref->count, ret,
(unsigned long long)ref->key.objectid, ref->key.type, (unsigned long long)ref->key_for_search.objectid,
(unsigned long long)ref->key.offset); ref->key_for_search.type,
(unsigned long long)ref->key_for_search.offset);
if (ret < 0) if (ret < 0)
goto out; goto out;
...@@ -248,10 +288,65 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, ...@@ -248,10 +288,65 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
return ret; return ret;
} }
static inline int ref_for_same_block(struct __prelim_ref *ref1,
struct __prelim_ref *ref2)
{
if (ref1->level != ref2->level)
return 0;
if (ref1->root_id != ref2->root_id)
return 0;
if (ref1->key_for_search.type != ref2->key_for_search.type)
return 0;
if (ref1->key_for_search.objectid != ref2->key_for_search.objectid)
return 0;
if (ref1->key_for_search.offset != ref2->key_for_search.offset)
return 0;
if (ref1->parent != ref2->parent)
return 0;
return 1;
}
/*
* read tree blocks and add keys where required.
*/
static int __add_missing_keys(struct btrfs_fs_info *fs_info,
struct list_head *head)
{
struct list_head *pos;
struct extent_buffer *eb;
list_for_each(pos, head) {
struct __prelim_ref *ref;
ref = list_entry(pos, struct __prelim_ref, list);
if (ref->parent)
continue;
if (ref->key_for_search.type)
continue;
BUG_ON(!ref->wanted_disk_byte);
eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte,
fs_info->tree_root->leafsize, 0);
BUG_ON(!eb);
btrfs_tree_read_lock(eb);
if (btrfs_header_level(eb) == 0)
btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0);
else
btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0);
btrfs_tree_read_unlock(eb);
free_extent_buffer(eb);
}
return 0;
}
/* /*
* merge two lists of backrefs and adjust counts accordingly * merge two lists of backrefs and adjust counts accordingly
* *
* mode = 1: merge identical keys, if key is set * mode = 1: merge identical keys, if key is set
* FIXME: if we add more keys in __add_prelim_ref, we can merge more here.
* additionally, we could even add a key range for the blocks we
* looked into to merge even more (-> replace unresolved refs by those
* having a parent).
* mode = 2: merge identical parents * mode = 2: merge identical parents
*/ */
static int __merge_refs(struct list_head *head, int mode) static int __merge_refs(struct list_head *head, int mode)
...@@ -265,20 +360,21 @@ static int __merge_refs(struct list_head *head, int mode) ...@@ -265,20 +360,21 @@ static int __merge_refs(struct list_head *head, int mode)
ref1 = list_entry(pos1, struct __prelim_ref, list); ref1 = list_entry(pos1, struct __prelim_ref, list);
if (mode == 1 && ref1->key.type == 0)
continue;
for (pos2 = pos1->next, n2 = pos2->next; pos2 != head; for (pos2 = pos1->next, n2 = pos2->next; pos2 != head;
pos2 = n2, n2 = pos2->next) { pos2 = n2, n2 = pos2->next) {
struct __prelim_ref *ref2; struct __prelim_ref *ref2;
struct __prelim_ref *xchg;
ref2 = list_entry(pos2, struct __prelim_ref, list); ref2 = list_entry(pos2, struct __prelim_ref, list);
if (mode == 1) { if (mode == 1) {
if (memcmp(&ref1->key, &ref2->key, if (!ref_for_same_block(ref1, ref2))
sizeof(ref1->key)) ||
ref1->level != ref2->level ||
ref1->root_id != ref2->root_id)
continue; continue;
if (!ref1->parent && ref2->parent) {
xchg = ref1;
ref1 = ref2;
ref2 = xchg;
}
ref1->count += ref2->count; ref1->count += ref2->count;
} else { } else {
if (ref1->parent != ref2->parent) if (ref1->parent != ref2->parent)
...@@ -298,16 +394,17 @@ static int __merge_refs(struct list_head *head, int mode) ...@@ -298,16 +394,17 @@ static int __merge_refs(struct list_head *head, int mode)
* smaller or equal that seq to the list * smaller or equal that seq to the list
*/ */
static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
struct btrfs_key *info_key,
struct list_head *prefs) struct list_head *prefs)
{ {
struct btrfs_delayed_extent_op *extent_op = head->extent_op; struct btrfs_delayed_extent_op *extent_op = head->extent_op;
struct rb_node *n = &head->node.rb_node; struct rb_node *n = &head->node.rb_node;
struct btrfs_key key;
struct btrfs_key op_key = {0};
int sgn; int sgn;
int ret = 0; int ret = 0;
if (extent_op && extent_op->update_key) if (extent_op && extent_op->update_key)
btrfs_disk_key_to_cpu(info_key, &extent_op->key); btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
while ((n = rb_prev(n))) { while ((n = rb_prev(n))) {
struct btrfs_delayed_ref_node *node; struct btrfs_delayed_ref_node *node;
...@@ -339,7 +436,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, ...@@ -339,7 +436,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
struct btrfs_delayed_tree_ref *ref; struct btrfs_delayed_tree_ref *ref;
ref = btrfs_delayed_node_to_tree_ref(node); ref = btrfs_delayed_node_to_tree_ref(node);
ret = __add_prelim_ref(prefs, ref->root, info_key, ret = __add_prelim_ref(prefs, ref->root, &op_key,
ref->level + 1, 0, node->bytenr, ref->level + 1, 0, node->bytenr,
node->ref_mod * sgn); node->ref_mod * sgn);
break; break;
...@@ -348,7 +445,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, ...@@ -348,7 +445,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
struct btrfs_delayed_tree_ref *ref; struct btrfs_delayed_tree_ref *ref;
ref = btrfs_delayed_node_to_tree_ref(node); ref = btrfs_delayed_node_to_tree_ref(node);
ret = __add_prelim_ref(prefs, ref->root, info_key, ret = __add_prelim_ref(prefs, ref->root, NULL,
ref->level + 1, ref->parent, ref->level + 1, ref->parent,
node->bytenr, node->bytenr,
node->ref_mod * sgn); node->ref_mod * sgn);
...@@ -356,8 +453,6 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, ...@@ -356,8 +453,6 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
} }
case BTRFS_EXTENT_DATA_REF_KEY: { case BTRFS_EXTENT_DATA_REF_KEY: {
struct btrfs_delayed_data_ref *ref; struct btrfs_delayed_data_ref *ref;
struct btrfs_key key;
ref = btrfs_delayed_node_to_data_ref(node); ref = btrfs_delayed_node_to_data_ref(node);
key.objectid = ref->objectid; key.objectid = ref->objectid;
...@@ -370,7 +465,6 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, ...@@ -370,7 +465,6 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
} }
case BTRFS_SHARED_DATA_REF_KEY: { case BTRFS_SHARED_DATA_REF_KEY: {
struct btrfs_delayed_data_ref *ref; struct btrfs_delayed_data_ref *ref;
struct btrfs_key key;
ref = btrfs_delayed_node_to_data_ref(node); ref = btrfs_delayed_node_to_data_ref(node);
...@@ -396,8 +490,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, ...@@ -396,8 +490,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
*/ */
static int __add_inline_refs(struct btrfs_fs_info *fs_info, static int __add_inline_refs(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u64 bytenr, struct btrfs_path *path, u64 bytenr,
struct btrfs_key *info_key, int *info_level, int *info_level, struct list_head *prefs)
struct list_head *prefs)
{ {
int ret = 0; int ret = 0;
int slot; int slot;
...@@ -426,12 +519,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, ...@@ -426,12 +519,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
struct btrfs_tree_block_info *info; struct btrfs_tree_block_info *info;
struct btrfs_disk_key disk_key;
info = (struct btrfs_tree_block_info *)ptr; info = (struct btrfs_tree_block_info *)ptr;
*info_level = btrfs_tree_block_level(leaf, info); *info_level = btrfs_tree_block_level(leaf, info);
btrfs_tree_block_key(leaf, info, &disk_key);
btrfs_disk_key_to_cpu(info_key, &disk_key);
ptr += sizeof(struct btrfs_tree_block_info); ptr += sizeof(struct btrfs_tree_block_info);
BUG_ON(ptr > end); BUG_ON(ptr > end);
} else { } else {
...@@ -449,7 +539,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, ...@@ -449,7 +539,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
switch (type) { switch (type) {
case BTRFS_SHARED_BLOCK_REF_KEY: case BTRFS_SHARED_BLOCK_REF_KEY:
ret = __add_prelim_ref(prefs, 0, info_key, ret = __add_prelim_ref(prefs, 0, NULL,
*info_level + 1, offset, *info_level + 1, offset,
bytenr, 1); bytenr, 1);
break; break;
...@@ -464,8 +554,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, ...@@ -464,8 +554,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
break; break;
} }
case BTRFS_TREE_BLOCK_REF_KEY: case BTRFS_TREE_BLOCK_REF_KEY:
ret = __add_prelim_ref(prefs, offset, info_key, ret = __add_prelim_ref(prefs, offset, NULL,
*info_level + 1, 0, bytenr, 1); *info_level + 1, 0,
bytenr, 1);
break; break;
case BTRFS_EXTENT_DATA_REF_KEY: { case BTRFS_EXTENT_DATA_REF_KEY: {
struct btrfs_extent_data_ref *dref; struct btrfs_extent_data_ref *dref;
...@@ -479,8 +570,8 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, ...@@ -479,8 +570,8 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
key.type = BTRFS_EXTENT_DATA_KEY; key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = btrfs_extent_data_ref_offset(leaf, dref); key.offset = btrfs_extent_data_ref_offset(leaf, dref);
root = btrfs_extent_data_ref_root(leaf, dref); root = btrfs_extent_data_ref_root(leaf, dref);
ret = __add_prelim_ref(prefs, root, &key, 0, 0, bytenr, ret = __add_prelim_ref(prefs, root, &key, 0, 0,
count); bytenr, count);
break; break;
} }
default: default:
...@@ -498,8 +589,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, ...@@ -498,8 +589,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
*/ */
static int __add_keyed_refs(struct btrfs_fs_info *fs_info, static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u64 bytenr, struct btrfs_path *path, u64 bytenr,
struct btrfs_key *info_key, int info_level, int info_level, struct list_head *prefs)
struct list_head *prefs)
{ {
struct btrfs_root *extent_root = fs_info->extent_root; struct btrfs_root *extent_root = fs_info->extent_root;
int ret; int ret;
...@@ -529,7 +619,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, ...@@ -529,7 +619,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
switch (key.type) { switch (key.type) {
case BTRFS_SHARED_BLOCK_REF_KEY: case BTRFS_SHARED_BLOCK_REF_KEY:
ret = __add_prelim_ref(prefs, 0, info_key, ret = __add_prelim_ref(prefs, 0, NULL,
info_level + 1, key.offset, info_level + 1, key.offset,
bytenr, 1); bytenr, 1);
break; break;
...@@ -545,8 +635,9 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, ...@@ -545,8 +635,9 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
break; break;
} }
case BTRFS_TREE_BLOCK_REF_KEY: case BTRFS_TREE_BLOCK_REF_KEY:
ret = __add_prelim_ref(prefs, key.offset, info_key, ret = __add_prelim_ref(prefs, key.offset, NULL,
info_level + 1, 0, bytenr, 1); info_level + 1, 0,
bytenr, 1);
break; break;
case BTRFS_EXTENT_DATA_REF_KEY: { case BTRFS_EXTENT_DATA_REF_KEY: {
struct btrfs_extent_data_ref *dref; struct btrfs_extent_data_ref *dref;
...@@ -562,7 +653,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, ...@@ -562,7 +653,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
key.offset = btrfs_extent_data_ref_offset(leaf, dref); key.offset = btrfs_extent_data_ref_offset(leaf, dref);
root = btrfs_extent_data_ref_root(leaf, dref); root = btrfs_extent_data_ref_root(leaf, dref);
ret = __add_prelim_ref(prefs, root, &key, 0, 0, ret = __add_prelim_ref(prefs, root, &key, 0, 0,
bytenr, count); bytenr, count);
break; break;
} }
default: default:
...@@ -588,7 +679,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, ...@@ -588,7 +679,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
{ {
struct btrfs_key key; struct btrfs_key key;
struct btrfs_path *path; struct btrfs_path *path;
struct btrfs_key info_key = { 0 };
struct btrfs_delayed_ref_root *delayed_refs = NULL; struct btrfs_delayed_ref_root *delayed_refs = NULL;
struct btrfs_delayed_ref_head *head; struct btrfs_delayed_ref_head *head;
int info_level = 0; int info_level = 0;
...@@ -647,8 +737,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, ...@@ -647,8 +737,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
btrfs_put_delayed_ref(&head->node); btrfs_put_delayed_ref(&head->node);
goto again; goto again;
} }
ret = __add_delayed_refs(head, seq, &info_key, ret = __add_delayed_refs(head, seq, &prefs_delayed);
&prefs_delayed);
if (ret) { if (ret) {
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
goto out; goto out;
...@@ -668,10 +757,10 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, ...@@ -668,10 +757,10 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
if (key.objectid == bytenr && if (key.objectid == bytenr &&
key.type == BTRFS_EXTENT_ITEM_KEY) { key.type == BTRFS_EXTENT_ITEM_KEY) {
ret = __add_inline_refs(fs_info, path, bytenr, ret = __add_inline_refs(fs_info, path, bytenr,
&info_key, &info_level, &prefs); &info_level, &prefs);
if (ret) if (ret)
goto out; goto out;
ret = __add_keyed_refs(fs_info, path, bytenr, &info_key, ret = __add_keyed_refs(fs_info, path, bytenr,
info_level, &prefs); info_level, &prefs);
if (ret) if (ret)
goto out; goto out;
...@@ -679,16 +768,12 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, ...@@ -679,16 +768,12 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
} }
btrfs_release_path(path); btrfs_release_path(path);
/*
* when adding the delayed refs above, the info_key might not have
* been known yet. Go over the list and replace the missing keys
*/
list_for_each_entry(ref, &prefs_delayed, list) {
if ((ref->key.offset | ref->key.type | ref->key.objectid) == 0)
memcpy(&ref->key, &info_key, sizeof(ref->key));
}
list_splice_init(&prefs_delayed, &prefs); list_splice_init(&prefs_delayed, &prefs);
ret = __add_missing_keys(fs_info, &prefs);
if (ret)
goto out;
ret = __merge_refs(&prefs, 1); ret = __merge_refs(&prefs, 1);
if (ret) if (ret)
goto out; goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment