Commit 8da8d884 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.8-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - Fix a deadlock in fiemap.

   There was a big lock around the whole operation that can interfere
   with a page fault and mkwrite.

   Reducing the lock scope can also speed up fiemap

 - Fix range condition for extent defragmentation which could lead to
   worse layout in some cases

* tag 'for-6.8-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix deadlock with fiemap and extent locking
  btrfs: defrag: avoid unnecessary defrag caused by incorrect extent size
parents d8be5a55 b0ad381f
...@@ -1046,7 +1046,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode, ...@@ -1046,7 +1046,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
goto add; goto add;
/* Skip too large extent */ /* Skip too large extent */
if (range_len >= extent_thresh) if (em->len >= extent_thresh)
goto next; goto next;
/* /*
......
...@@ -2689,16 +2689,34 @@ static int fiemap_process_hole(struct btrfs_inode *inode, ...@@ -2689,16 +2689,34 @@ static int fiemap_process_hole(struct btrfs_inode *inode,
* it beyond i_size. * it beyond i_size.
*/ */
while (cur_offset < end && cur_offset < i_size) { while (cur_offset < end && cur_offset < i_size) {
struct extent_state *cached_state = NULL;
u64 delalloc_start; u64 delalloc_start;
u64 delalloc_end; u64 delalloc_end;
u64 prealloc_start; u64 prealloc_start;
u64 lockstart;
u64 lockend;
u64 prealloc_len = 0; u64 prealloc_len = 0;
bool delalloc; bool delalloc;
lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize);
lockend = round_up(end, inode->root->fs_info->sectorsize);
/*
* We are only locking for the delalloc range because that's the
* only thing that can change here. With fiemap we have a lock
* on the inode, so no buffered or direct writes can happen.
*
* However mmaps and normal page writeback will cause this to
* change arbitrarily. We have to lock the extent lock here to
* make sure that nobody messes with the tree while we're doing
* btrfs_find_delalloc_in_range.
*/
lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end, delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end,
delalloc_cached_state, delalloc_cached_state,
&delalloc_start, &delalloc_start,
&delalloc_end); &delalloc_end);
unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
if (!delalloc) if (!delalloc)
break; break;
...@@ -2866,15 +2884,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -2866,15 +2884,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len) u64 start, u64 len)
{ {
const u64 ino = btrfs_ino(inode); const u64 ino = btrfs_ino(inode);
struct extent_state *cached_state = NULL;
struct extent_state *delalloc_cached_state = NULL; struct extent_state *delalloc_cached_state = NULL;
struct btrfs_path *path; struct btrfs_path *path;
struct fiemap_cache cache = { 0 }; struct fiemap_cache cache = { 0 };
struct btrfs_backref_share_check_ctx *backref_ctx; struct btrfs_backref_share_check_ctx *backref_ctx;
u64 last_extent_end; u64 last_extent_end;
u64 prev_extent_end; u64 prev_extent_end;
u64 lockstart; u64 range_start;
u64 lockend; u64 range_end;
const u64 sectorsize = inode->root->fs_info->sectorsize;
bool stopped = false; bool stopped = false;
int ret; int ret;
...@@ -2885,12 +2903,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -2885,12 +2903,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
goto out; goto out;
} }
lockstart = round_down(start, inode->root->fs_info->sectorsize); range_start = round_down(start, sectorsize);
lockend = round_up(start + len, inode->root->fs_info->sectorsize); range_end = round_up(start + len, sectorsize);
prev_extent_end = lockstart; prev_extent_end = range_start;
btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
if (ret < 0) if (ret < 0)
...@@ -2898,7 +2915,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -2898,7 +2915,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
btrfs_release_path(path); btrfs_release_path(path);
path->reada = READA_FORWARD; path->reada = READA_FORWARD;
ret = fiemap_search_slot(inode, path, lockstart); ret = fiemap_search_slot(inode, path, range_start);
if (ret < 0) { if (ret < 0) {
goto out_unlock; goto out_unlock;
} else if (ret > 0) { } else if (ret > 0) {
...@@ -2910,7 +2927,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -2910,7 +2927,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
goto check_eof_delalloc; goto check_eof_delalloc;
} }
while (prev_extent_end < lockend) { while (prev_extent_end < range_end) {
struct extent_buffer *leaf = path->nodes[0]; struct extent_buffer *leaf = path->nodes[0];
struct btrfs_file_extent_item *ei; struct btrfs_file_extent_item *ei;
struct btrfs_key key; struct btrfs_key key;
...@@ -2933,19 +2950,19 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -2933,19 +2950,19 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
* The first iteration can leave us at an extent item that ends * The first iteration can leave us at an extent item that ends
* before our range's start. Move to the next item. * before our range's start. Move to the next item.
*/ */
if (extent_end <= lockstart) if (extent_end <= range_start)
goto next_item; goto next_item;
backref_ctx->curr_leaf_bytenr = leaf->start; backref_ctx->curr_leaf_bytenr = leaf->start;
/* We have in implicit hole (NO_HOLES feature enabled). */ /* We have in implicit hole (NO_HOLES feature enabled). */
if (prev_extent_end < key.offset) { if (prev_extent_end < key.offset) {
const u64 range_end = min(key.offset, lockend) - 1; const u64 hole_end = min(key.offset, range_end) - 1;
ret = fiemap_process_hole(inode, fieinfo, &cache, ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state, &delalloc_cached_state,
backref_ctx, 0, 0, 0, backref_ctx, 0, 0, 0,
prev_extent_end, range_end); prev_extent_end, hole_end);
if (ret < 0) { if (ret < 0) {
goto out_unlock; goto out_unlock;
} else if (ret > 0) { } else if (ret > 0) {
...@@ -2955,7 +2972,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -2955,7 +2972,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
} }
/* We've reached the end of the fiemap range, stop. */ /* We've reached the end of the fiemap range, stop. */
if (key.offset >= lockend) { if (key.offset >= range_end) {
stopped = true; stopped = true;
break; break;
} }
...@@ -3049,29 +3066,41 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -3049,29 +3066,41 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
btrfs_free_path(path); btrfs_free_path(path);
path = NULL; path = NULL;
if (!stopped && prev_extent_end < lockend) { if (!stopped && prev_extent_end < range_end) {
ret = fiemap_process_hole(inode, fieinfo, &cache, ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state, backref_ctx, &delalloc_cached_state, backref_ctx,
0, 0, 0, prev_extent_end, lockend - 1); 0, 0, 0, prev_extent_end, range_end - 1);
if (ret < 0) if (ret < 0)
goto out_unlock; goto out_unlock;
prev_extent_end = lockend; prev_extent_end = range_end;
} }
if (cache.cached && cache.offset + cache.len >= last_extent_end) { if (cache.cached && cache.offset + cache.len >= last_extent_end) {
const u64 i_size = i_size_read(&inode->vfs_inode); const u64 i_size = i_size_read(&inode->vfs_inode);
if (prev_extent_end < i_size) { if (prev_extent_end < i_size) {
struct extent_state *cached_state = NULL;
u64 delalloc_start; u64 delalloc_start;
u64 delalloc_end; u64 delalloc_end;
u64 lockstart;
u64 lockend;
bool delalloc; bool delalloc;
lockstart = round_down(prev_extent_end, sectorsize);
lockend = round_up(i_size, sectorsize);
/*
* See the comment in fiemap_process_hole as to why
* we're doing the locking here.
*/
lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
delalloc = btrfs_find_delalloc_in_range(inode, delalloc = btrfs_find_delalloc_in_range(inode,
prev_extent_end, prev_extent_end,
i_size - 1, i_size - 1,
&delalloc_cached_state, &delalloc_cached_state,
&delalloc_start, &delalloc_start,
&delalloc_end); &delalloc_end);
unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
if (!delalloc) if (!delalloc)
cache.flags |= FIEMAP_EXTENT_LAST; cache.flags |= FIEMAP_EXTENT_LAST;
} else { } else {
...@@ -3082,7 +3111,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -3082,7 +3111,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
ret = emit_last_fiemap_cache(fieinfo, &cache); ret = emit_last_fiemap_cache(fieinfo, &cache);
out_unlock: out_unlock:
unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
out: out:
free_extent_state(delalloc_cached_state); free_extent_state(delalloc_cached_state);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment