Commit c0419188 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.17-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "This is a hopefully last batch of fixes for defrag that got broken in
  5.16, all stable material.

  The remaining reported problem is excessive IO with autodefrag due to
  various conditions in the defrag code not met or missing"

* tag 'for-5.17-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: reduce extent threshold for autodefrag
  btrfs: autodefrag: only scan one inode once
  btrfs: defrag: don't use merged extent map for their generation check
  btrfs: defrag: bring back the old file extent search behavior
  btrfs: defrag: remove an ambiguous condition for rejection
  btrfs: defrag: don't defrag extents which are already at max capacity
  btrfs: defrag: don't try to merge regular extents with preallocated extents
  btrfs: defrag: allow defrag_one_cluster() to skip large extent which is not a target
  btrfs: prevent copying too big compressed lzo segment
parents ca745723 558732df
...@@ -3291,7 +3291,7 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info, ...@@ -3291,7 +3291,7 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
int __init btrfs_auto_defrag_init(void); int __init btrfs_auto_defrag_init(void);
void __cold btrfs_auto_defrag_exit(void); void __cold btrfs_auto_defrag_exit(void);
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode); struct btrfs_inode *inode, u32 extent_thresh);
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info); void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
......
...@@ -261,6 +261,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) ...@@ -261,6 +261,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
em->mod_start = merge->mod_start; em->mod_start = merge->mod_start;
em->generation = max(em->generation, merge->generation); em->generation = max(em->generation, merge->generation);
set_bit(EXTENT_FLAG_MERGED, &em->flags);
rb_erase_cached(&merge->rb_node, &tree->map); rb_erase_cached(&merge->rb_node, &tree->map);
RB_CLEAR_NODE(&merge->rb_node); RB_CLEAR_NODE(&merge->rb_node);
...@@ -278,6 +279,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) ...@@ -278,6 +279,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
RB_CLEAR_NODE(&merge->rb_node); RB_CLEAR_NODE(&merge->rb_node);
em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
em->generation = max(em->generation, merge->generation); em->generation = max(em->generation, merge->generation);
set_bit(EXTENT_FLAG_MERGED, &em->flags);
free_extent_map(merge); free_extent_map(merge);
} }
} }
......
...@@ -25,6 +25,8 @@ enum { ...@@ -25,6 +25,8 @@ enum {
EXTENT_FLAG_FILLING, EXTENT_FLAG_FILLING,
/* filesystem extent mapping type */ /* filesystem extent mapping type */
EXTENT_FLAG_FS_MAPPING, EXTENT_FLAG_FS_MAPPING,
/* This em is merged from two or more physically adjacent ems */
EXTENT_FLAG_MERGED,
}; };
struct extent_map { struct extent_map {
...@@ -40,6 +42,12 @@ struct extent_map { ...@@ -40,6 +42,12 @@ struct extent_map {
u64 ram_bytes; u64 ram_bytes;
u64 block_start; u64 block_start;
u64 block_len; u64 block_len;
/*
* Generation of the extent map, for merged em it's the highest
* generation of all merged ems.
* For non-merged extents, it's from btrfs_file_extent_item::generation.
*/
u64 generation; u64 generation;
unsigned long flags; unsigned long flags;
/* Used for chunk mappings, flag EXTENT_FLAG_FS_MAPPING must be set */ /* Used for chunk mappings, flag EXTENT_FLAG_FS_MAPPING must be set */
......
...@@ -50,11 +50,14 @@ struct inode_defrag { ...@@ -50,11 +50,14 @@ struct inode_defrag {
/* root objectid */ /* root objectid */
u64 root; u64 root;
/* last offset we were able to defrag */ /*
u64 last_offset; * The extent size threshold for autodefrag.
*
/* if we've wrapped around back to zero once already */ * This value is different for compressed/non-compressed extents,
int cycled; * thus needs to be passed from higher layer.
* (aka, inode_should_defrag())
*/
u32 extent_thresh;
}; };
static int __compare_inode_defrag(struct inode_defrag *defrag1, static int __compare_inode_defrag(struct inode_defrag *defrag1,
...@@ -107,8 +110,8 @@ static int __btrfs_add_inode_defrag(struct btrfs_inode *inode, ...@@ -107,8 +110,8 @@ static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
*/ */
if (defrag->transid < entry->transid) if (defrag->transid < entry->transid)
entry->transid = defrag->transid; entry->transid = defrag->transid;
if (defrag->last_offset > entry->last_offset) entry->extent_thresh = min(defrag->extent_thresh,
entry->last_offset = defrag->last_offset; entry->extent_thresh);
return -EEXIST; return -EEXIST;
} }
} }
...@@ -134,7 +137,7 @@ static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info) ...@@ -134,7 +137,7 @@ static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
* enabled * enabled
*/ */
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode) struct btrfs_inode *inode, u32 extent_thresh)
{ {
struct btrfs_root *root = inode->root; struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
...@@ -160,6 +163,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, ...@@ -160,6 +163,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
defrag->ino = btrfs_ino(inode); defrag->ino = btrfs_ino(inode);
defrag->transid = transid; defrag->transid = transid;
defrag->root = root->root_key.objectid; defrag->root = root->root_key.objectid;
defrag->extent_thresh = extent_thresh;
spin_lock(&fs_info->defrag_inodes_lock); spin_lock(&fs_info->defrag_inodes_lock);
if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) { if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) {
...@@ -178,34 +182,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, ...@@ -178,34 +182,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
return 0; return 0;
} }
/*
* Requeue the defrag object. If there is a defrag object that points to
* the same inode in the tree, we will merge them together (by
* __btrfs_add_inode_defrag()) and free the one that we want to requeue.
*/
static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode,
struct inode_defrag *defrag)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
int ret;
if (!__need_auto_defrag(fs_info))
goto out;
/*
* Here we don't check the IN_DEFRAG flag, because we need merge
* them together.
*/
spin_lock(&fs_info->defrag_inodes_lock);
ret = __btrfs_add_inode_defrag(inode, defrag);
spin_unlock(&fs_info->defrag_inodes_lock);
if (ret)
goto out;
return;
out:
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
}
/* /*
* pick the defragable inode that we want, if it doesn't exist, we will get * pick the defragable inode that we want, if it doesn't exist, we will get
* the next one. * the next one.
...@@ -278,8 +254,14 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, ...@@ -278,8 +254,14 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
struct btrfs_root *inode_root; struct btrfs_root *inode_root;
struct inode *inode; struct inode *inode;
struct btrfs_ioctl_defrag_range_args range; struct btrfs_ioctl_defrag_range_args range;
int num_defrag; int ret = 0;
int ret; u64 cur = 0;
again:
if (test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state))
goto cleanup;
if (!__need_auto_defrag(fs_info))
goto cleanup;
/* get the inode */ /* get the inode */
inode_root = btrfs_get_fs_root(fs_info, defrag->root, true); inode_root = btrfs_get_fs_root(fs_info, defrag->root, true);
...@@ -295,39 +277,30 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, ...@@ -295,39 +277,30 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
goto cleanup; goto cleanup;
} }
if (cur >= i_size_read(inode)) {
iput(inode);
goto cleanup;
}
/* do a chunk of defrag */ /* do a chunk of defrag */
clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
memset(&range, 0, sizeof(range)); memset(&range, 0, sizeof(range));
range.len = (u64)-1; range.len = (u64)-1;
range.start = defrag->last_offset; range.start = cur;
range.extent_thresh = defrag->extent_thresh;
sb_start_write(fs_info->sb); sb_start_write(fs_info->sb);
num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, ret = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
BTRFS_DEFRAG_BATCH); BTRFS_DEFRAG_BATCH);
sb_end_write(fs_info->sb); sb_end_write(fs_info->sb);
/*
* if we filled the whole defrag batch, there
* must be more work to do. Queue this defrag
* again
*/
if (num_defrag == BTRFS_DEFRAG_BATCH) {
defrag->last_offset = range.start;
btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
} else if (defrag->last_offset && !defrag->cycled) {
/*
* we didn't fill our defrag batch, but
* we didn't start at zero. Make sure we loop
* around to the start of the file.
*/
defrag->last_offset = 0;
defrag->cycled = 1;
btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
} else {
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
}
iput(inode); iput(inode);
return 0;
if (ret < 0)
goto cleanup;
cur = max(cur + fs_info->sectorsize, range.start);
goto again;
cleanup: cleanup:
kmem_cache_free(btrfs_inode_defrag_cachep, defrag); kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
return ret; return ret;
......
...@@ -560,12 +560,12 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start, ...@@ -560,12 +560,12 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
} }
static inline void inode_should_defrag(struct btrfs_inode *inode, static inline void inode_should_defrag(struct btrfs_inode *inode,
u64 start, u64 end, u64 num_bytes, u64 small_write) u64 start, u64 end, u64 num_bytes, u32 small_write)
{ {
/* If this is a small write inside eof, kick off a defrag */ /* If this is a small write inside eof, kick off a defrag */
if (num_bytes < small_write && if (num_bytes < small_write &&
(start > 0 || end + 1 < inode->disk_i_size)) (start > 0 || end + 1 < inode->disk_i_size))
btrfs_add_inode_defrag(NULL, inode); btrfs_add_inode_defrag(NULL, inode, small_write);
} }
/* /*
......
This diff is collapsed.
...@@ -380,6 +380,17 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) ...@@ -380,6 +380,17 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
kunmap(cur_page); kunmap(cur_page);
cur_in += LZO_LEN; cur_in += LZO_LEN;
if (seg_len > lzo1x_worst_compress(PAGE_SIZE)) {
/*
* seg_len shouldn't be larger than we have allocated
* for workspace->cbuf
*/
btrfs_err(fs_info, "unexpectedly large lzo segment len %u",
seg_len);
ret = -EIO;
goto out;
}
/* Copy the compressed segment payload into workspace */ /* Copy the compressed segment payload into workspace */
copy_compressed_segment(cb, workspace->cbuf, seg_len, &cur_in); copy_compressed_segment(cb, workspace->cbuf, seg_len, &cur_in);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment