Commit 619104ba authored by Filipe Manana's avatar Filipe Manana Committed by David Sterba

btrfs: move common NOCOW checks against a file extent into a helper

Verifying if we can do a NOCOW write against a range fully or partially
covered by a file extent item requires verifying several constraints, and
these are currently duplicated at two different places: can_nocow_extent()
and run_delalloc_nocow().

This change moves those checks into a common helper function to avoid
duplication. It adds some comments and also preserves all existing
behaviour like for example can_nocow_extent() treating errors from the
calls to btrfs_cross_ref_exist() and csum_exist_in_range() as meaning
we can not NOCOW, instead of propagating the error back to the caller.
That specific behaviour is questionable but also reasonable to some
degree.
Signed-off-by: default avatarFilipe Manana <fdmanana@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 395cb57e
......@@ -1617,6 +1617,141 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
nr_written, 1);
}
struct can_nocow_file_extent_args {
/* Input fields. */
/* Start file offset of the range we want to NOCOW. */
u64 start;
/* End file offset (inclusive) of the range we want to NOCOW. */
u64 end;
bool writeback_path;
bool strict;
/*
* Free the path passed to can_nocow_file_extent() once it's not needed
* anymore.
*/
bool free_path;
/* Output fields. Only set when can_nocow_file_extent() returns 1. */
u64 disk_bytenr;
u64 disk_num_bytes;
u64 extent_offset;
/* Number of bytes that can be written to in NOCOW mode. */
u64 num_bytes;
};
/*
* Check if we can NOCOW the file extent that the path points to.
* This function may return with the path released, so the caller should check
* if path->nodes[0] is NULL or not if it needs to use the path afterwards.
*
* Returns: < 0 on error
* 0 if we can not NOCOW
* 1 if we can NOCOW
*/
static int can_nocow_file_extent(struct btrfs_path *path,
struct btrfs_key *key,
struct btrfs_inode *inode,
struct can_nocow_file_extent_args *args)
{
const bool is_freespace_inode = btrfs_is_free_space_inode(inode);
struct extent_buffer *leaf = path->nodes[0];
struct btrfs_root *root = inode->root;
struct btrfs_file_extent_item *fi;
u64 extent_end;
u8 extent_type;
int can_nocow = 0;
int ret = 0;
fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(leaf, fi);
if (extent_type == BTRFS_FILE_EXTENT_INLINE)
goto out;
/* Can't access these fields unless we know it's not an inline extent. */
args->disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
args->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
args->extent_offset = btrfs_file_extent_offset(leaf, fi);
if (!(inode->flags & BTRFS_INODE_NODATACOW) &&
extent_type == BTRFS_FILE_EXTENT_REG)
goto out;
/*
* If the extent was created before the generation where the last snapshot
* for its subvolume was created, then this implies the extent is shared,
* hence we must COW.
*/
if (!args->strict && !is_freespace_inode &&
btrfs_file_extent_generation(leaf, fi) <=
btrfs_root_last_snapshot(&root->root_item))
goto out;
/* An explicit hole, must COW. */
if (args->disk_bytenr == 0)
goto out;
/* Compressed/encrypted/encoded extents must be COWed. */
if (btrfs_file_extent_compression(leaf, fi) ||
btrfs_file_extent_encryption(leaf, fi) ||
btrfs_file_extent_other_encoding(leaf, fi))
goto out;
extent_end = btrfs_file_extent_end(path);
/*
* The following checks can be expensive, as they need to take other
* locks and do btree or rbtree searches, so release the path to avoid
* blocking other tasks for too long.
*/
btrfs_release_path(path);
ret = btrfs_cross_ref_exist(root, btrfs_ino(inode),
key->offset - args->extent_offset,
args->disk_bytenr, false, path);
WARN_ON_ONCE(ret > 0 && is_freespace_inode);
if (ret != 0)
goto out;
if (args->free_path) {
/*
* We don't need the path anymore, plus through the
* csum_exist_in_range() call below we will end up allocating
* another path. So free the path to avoid unnecessary extra
* memory usage.
*/
btrfs_free_path(path);
path = NULL;
}
/* If there are pending snapshots for this root, we must COW. */
if (args->writeback_path && !is_freespace_inode &&
atomic_read(&root->snapshot_force_cow))
goto out;
args->disk_bytenr += args->extent_offset;
args->disk_bytenr += args->start - key->offset;
args->num_bytes = min(args->end + 1, extent_end) - args->start;
/*
* Force COW if csums exist in the range. This ensures that csums for a
* given extent are either valid or do not exist.
*/
ret = csum_exist_in_range(root->fs_info, args->disk_bytenr, args->num_bytes);
WARN_ON_ONCE(ret > 0 && is_freespace_inode);
if (ret != 0)
goto out;
can_nocow = 1;
out:
if (args->free_path && path)
btrfs_free_path(path);
return ret < 0 ? ret : can_nocow;
}
/*
* when nowcow writeback call back. This checks for snapshots or COW copies
* of the extents that exist in the file, and COWs the file as required.
......@@ -1637,11 +1772,9 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
u64 cur_offset = start;
int ret;
bool check_prev = true;
const bool freespace_inode = btrfs_is_free_space_inode(inode);
u64 ino = btrfs_ino(inode);
bool nocow = false;
u64 disk_bytenr = 0;
const bool force = inode->flags & BTRFS_INODE_NODATACOW;
struct can_nocow_file_extent_args nocow_args = { 0 };
path = btrfs_alloc_path();
if (!path) {
......@@ -1654,15 +1787,16 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
return -ENOMEM;
}
nocow_args.end = end;
nocow_args.writeback_path = true;
while (1) {
struct btrfs_key found_key;
struct btrfs_file_extent_item *fi;
struct extent_buffer *leaf;
u64 extent_end;
u64 extent_offset;
u64 num_bytes = 0;
u64 disk_num_bytes;
u64 ram_bytes;
u64 nocow_end;
int extent_type;
nocow = false;
......@@ -1738,117 +1872,37 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(leaf, fi);
/* If this is triggered then we have a memory corruption. */
ASSERT(extent_type < BTRFS_NR_FILE_EXTENT_TYPES);
if (WARN_ON(extent_type >= BTRFS_NR_FILE_EXTENT_TYPES)) {
ret = -EUCLEAN;
goto error;
}
ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
if (extent_type == BTRFS_FILE_EXTENT_REG ||
extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
extent_offset = btrfs_file_extent_offset(leaf, fi);
extent_end = found_key.offset +
btrfs_file_extent_num_bytes(leaf, fi);
disk_num_bytes =
btrfs_file_extent_disk_num_bytes(leaf, fi);
extent_end = btrfs_file_extent_end(path);
/*
* If the extent we got ends before our current offset,
* skip to the next extent.
* If the extent we got ends before our current offset, skip to
* the next extent.
*/
if (extent_end <= cur_offset) {
path->slots[0]++;
goto next_slot;
}
/* Skip holes */
if (disk_bytenr == 0)
goto out_check;
/* Skip compressed/encrypted/encoded extents */
if (btrfs_file_extent_compression(leaf, fi) ||
btrfs_file_extent_encryption(leaf, fi) ||
btrfs_file_extent_other_encoding(leaf, fi))
goto out_check;
/*
* If extent is created before the last volume's snapshot
* this implies the extent is shared, hence we can't do
* nocow. This is the same check as in
* btrfs_cross_ref_exist but without calling
* btrfs_search_slot.
*/
if (!freespace_inode &&
btrfs_file_extent_generation(leaf, fi) <=
btrfs_root_last_snapshot(&root->root_item))
goto out_check;
if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
goto out_check;
/*
* The following checks can be expensive, as they need to
* take other locks and do btree or rbtree searches, so
* release the path to avoid blocking other tasks for too
* long.
*/
btrfs_release_path(path);
ret = btrfs_cross_ref_exist(root, ino,
found_key.offset -
extent_offset, disk_bytenr,
false, path);
if (ret) {
/*
* ret could be -EIO if the above fails to read
* metadata.
*/
nocow_args.start = cur_offset;
ret = can_nocow_file_extent(path, &found_key, inode, &nocow_args);
if (ret < 0) {
if (cow_start != (u64)-1)
cur_offset = cow_start;
goto error;
}
WARN_ON_ONCE(freespace_inode);
} else if (ret == 0) {
goto out_check;
}
disk_bytenr += extent_offset;
disk_bytenr += cur_offset - found_key.offset;
num_bytes = min(end + 1, extent_end) - cur_offset;
/*
* If there are pending snapshots for this root, we
* fall into common COW way
*/
if (!freespace_inode && atomic_read(&root->snapshot_force_cow))
goto out_check;
/*
* force cow if csum exists in the range.
* this ensure that csum for a given extent are
* either valid or do not exist.
*/
ret = csum_exist_in_range(fs_info, disk_bytenr,
num_bytes);
if (ret) {
/*
* ret could be -EIO if the above fails to read
* metadata.
*/
if (ret < 0) {
if (cow_start != (u64)-1)
cur_offset = cow_start;
goto error;
}
WARN_ON_ONCE(freespace_inode);
goto out_check;
}
/* If the extent's block group is RO, we must COW */
if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
goto out_check;
ret = 0;
if (btrfs_inc_nocow_writers(fs_info, nocow_args.disk_bytenr))
nocow = true;
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
extent_end = found_key.offset + ram_bytes;
extent_end = ALIGN(extent_end, fs_info->sectorsize);
/* Skip extents outside of our requested range */
if (extent_end <= start) {
path->slots[0]++;
goto next_slot;
}
} else {
/* If this triggers then we have a memory corruption */
BUG();
}
out_check:
/*
* If nocow is false then record the beginning of the range
......@@ -1880,15 +1934,17 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
cow_start = (u64)-1;
}
nocow_end = cur_offset + nocow_args.num_bytes - 1;
if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
u64 orig_start = found_key.offset - extent_offset;
u64 orig_start = found_key.offset - nocow_args.extent_offset;
struct extent_map *em;
em = create_io_em(inode, cur_offset, num_bytes,
em = create_io_em(inode, cur_offset, nocow_args.num_bytes,
orig_start,
disk_bytenr, /* block_start */
num_bytes, /* block_len */
disk_num_bytes, /* orig_block_len */
nocow_args.disk_bytenr, /* block_start */
nocow_args.num_bytes, /* block_len */
nocow_args.disk_num_bytes, /* orig_block_len */
ram_bytes, BTRFS_COMPRESS_NONE,
BTRFS_ORDERED_PREALLOC);
if (IS_ERR(em)) {
......@@ -1897,20 +1953,23 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
}
free_extent_map(em);
ret = btrfs_add_ordered_extent(inode,
cur_offset, num_bytes, num_bytes,
disk_bytenr, num_bytes, 0,
cur_offset, nocow_args.num_bytes,
nocow_args.num_bytes,
nocow_args.disk_bytenr,
nocow_args.num_bytes, 0,
1 << BTRFS_ORDERED_PREALLOC,
BTRFS_COMPRESS_NONE);
if (ret) {
btrfs_drop_extent_cache(inode, cur_offset,
cur_offset + num_bytes - 1,
0);
nocow_end, 0);
goto error;
}
} else {
ret = btrfs_add_ordered_extent(inode, cur_offset,
num_bytes, num_bytes,
disk_bytenr, num_bytes,
nocow_args.num_bytes,
nocow_args.num_bytes,
nocow_args.disk_bytenr,
nocow_args.num_bytes,
0,
1 << BTRFS_ORDERED_NOCOW,
BTRFS_COMPRESS_NONE);
......@@ -1919,7 +1978,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
}
if (nocow)
btrfs_dec_nocow_writers(fs_info, disk_bytenr);
btrfs_dec_nocow_writers(fs_info, nocow_args.disk_bytenr);
nocow = false;
if (btrfs_is_data_reloc_root(root))
......@@ -1929,10 +1988,9 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
* from freeing metadata of created ordered extent.
*/
ret = btrfs_reloc_clone_csums(inode, cur_offset,
num_bytes);
nocow_args.num_bytes);
extent_clear_unlock_delalloc(inode, cur_offset,
cur_offset + num_bytes - 1,
extent_clear_unlock_delalloc(inode, cur_offset, nocow_end,
locked_page, EXTENT_LOCKED |
EXTENT_DELALLOC |
EXTENT_CLEAR_DATA_RESV,
......@@ -1965,7 +2023,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
error:
if (nocow)
btrfs_dec_nocow_writers(fs_info, disk_bytenr);
btrfs_dec_nocow_writers(fs_info, nocow_args.disk_bytenr);
if (ret && cur_offset < end)
extent_clear_unlock_delalloc(inode, cur_offset, end,
......@@ -7107,6 +7165,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *ram_bytes, bool strict)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct can_nocow_file_extent_args nocow_args = { 0 };
struct btrfs_path *path;
int ret;
struct extent_buffer *leaf;
......@@ -7114,13 +7173,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
u64 disk_bytenr;
u64 backref_offset;
u64 extent_end;
u64 num_bytes;
int slot;
int found_type;
bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
path = btrfs_alloc_path();
if (!path)
......@@ -7131,18 +7184,17 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
if (ret < 0)
goto out;
slot = path->slots[0];
if (ret == 1) {
if (slot == 0) {
if (path->slots[0] == 0) {
/* can't find the item, must cow */
ret = 0;
goto out;
}
slot--;
path->slots[0]--;
}
ret = 0;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, slot);
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
key.type != BTRFS_EXTENT_DATA_KEY) {
/* not our file or wrong item type, must cow */
......@@ -7154,57 +7206,38 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
goto out;
}
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
found_type = btrfs_file_extent_type(leaf, fi);
if (found_type != BTRFS_FILE_EXTENT_REG &&
found_type != BTRFS_FILE_EXTENT_PREALLOC) {
/* not a regular extent, must cow */
if (btrfs_file_extent_end(path) <= offset)
goto out;
}
if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
goto out;
extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
if (extent_end <= offset)
goto out;
fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
found_type = btrfs_file_extent_type(leaf, fi);
if (ram_bytes)
*ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
if (disk_bytenr == 0)
goto out;
nocow_args.start = offset;
nocow_args.end = offset + *len - 1;
nocow_args.strict = strict;
nocow_args.free_path = true;
if (btrfs_file_extent_compression(leaf, fi) ||
btrfs_file_extent_encryption(leaf, fi) ||
btrfs_file_extent_other_encoding(leaf, fi))
goto out;
ret = can_nocow_file_extent(path, &key, BTRFS_I(inode), &nocow_args);
/* can_nocow_file_extent() has freed the path. */
path = NULL;
/*
* Do the same check as in btrfs_cross_ref_exist but without the
* unnecessary search.
*/
if (!strict &&
(btrfs_file_extent_generation(leaf, fi) <=
btrfs_root_last_snapshot(&root->root_item)))
if (ret != 1) {
/* Treat errors as not being able to NOCOW. */
ret = 0;
goto out;
backref_offset = btrfs_file_extent_offset(leaf, fi);
if (orig_start) {
*orig_start = key.offset - backref_offset;
*orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
*ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
}
btrfs_release_path(path);
if (btrfs_extent_readonly(fs_info, disk_bytenr))
ret = 0;
if (btrfs_extent_readonly(fs_info, nocow_args.disk_bytenr))
goto out;
num_bytes = min(offset + *len, extent_end) - offset;
if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) {
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
u64 range_end;
range_end = round_up(offset + num_bytes,
range_end = round_up(offset + nocow_args.num_bytes,
root->fs_info->sectorsize) - 1;
ret = test_range_bit(io_tree, offset, range_end,
EXTENT_DELALLOC, 0, NULL);
......@@ -7214,42 +7247,12 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
}
}
/*
* look for other files referencing this extent, if we
* find any we must cow
*/
ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)),
key.offset - backref_offset, disk_bytenr,
strict, path);
if (ret) {
ret = 0;
goto out;
}
if (orig_start)
*orig_start = key.offset - nocow_args.extent_offset;
if (orig_block_len)
*orig_block_len = nocow_args.disk_num_bytes;
/*
* We don't need the path anymore, plus through the csum_exist_in_range()
* call below we will end up allocating another path. So free the path
* to avoid unnecessary extra memory usage.
*/
btrfs_free_path(path);
path = NULL;
/*
* adjust disk_bytenr and num_bytes to cover just the bytes
* in this extent we are about to write. If there
* are any csums in that range we have to cow in order
* to keep the csums correct
*/
disk_bytenr += backref_offset;
disk_bytenr += offset - key.offset;
if (csum_exist_in_range(fs_info, disk_bytenr, num_bytes))
goto out;
/*
* all of the above have passed, it is safe to overwrite this extent
* without cow
*/
*len = num_bytes;
*len = nocow_args.num_bytes;
ret = 1;
out:
btrfs_free_path(path);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment