Commit 578def7c authored by Filipe Manana's avatar Filipe Manana

Btrfs: don't wait for unrelated IO to finish before relocation

Before the relocation process of a block group starts, it sets the block
group to readonly mode, then flushes all delalloc writes and then finally
it waits for all ordered extents to complete. This last step includes
waiting for ordered extents destinated at extents allocated in other block
groups, making us waste unecessary time.

So improve this by waiting only for ordered extents that fall into the
block group's range.
Signed-off-by: default avatarFilipe Manana <fdmanana@suse.com>
Reviewed-by: default avatarJosef Bacik <jbacik@fb.com>
Reviewed-by: default avatarLiu Bo <bo.li.liu@oracle.com>
parent 3f9749f6
...@@ -403,7 +403,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, ...@@ -403,7 +403,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
if (ret) if (ret)
btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret); btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret);
btrfs_wait_ordered_roots(root->fs_info, -1); btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1);
/* force writing the updated state information to disk */ /* force writing the updated state information to disk */
trans = btrfs_start_transaction(root, 0); trans = btrfs_start_transaction(root, 0);
...@@ -495,7 +495,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, ...@@ -495,7 +495,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return ret; return ret;
} }
btrfs_wait_ordered_roots(root->fs_info, -1); btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1);
trans = btrfs_start_transaction(root, 0); trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
......
...@@ -4141,7 +4141,7 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes) ...@@ -4141,7 +4141,7 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
if (need_commit > 0) { if (need_commit > 0) {
btrfs_start_delalloc_roots(fs_info, 0, -1); btrfs_start_delalloc_roots(fs_info, 0, -1);
btrfs_wait_ordered_roots(fs_info, -1); btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
} }
trans = btrfs_join_transaction(root); trans = btrfs_join_transaction(root);
...@@ -4583,7 +4583,8 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, ...@@ -4583,7 +4583,8 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
*/ */
btrfs_start_delalloc_roots(root->fs_info, 0, nr_items); btrfs_start_delalloc_roots(root->fs_info, 0, nr_items);
if (!current->journal_info) if (!current->journal_info)
btrfs_wait_ordered_roots(root->fs_info, nr_items); btrfs_wait_ordered_roots(root->fs_info, nr_items,
0, (u64)-1);
} }
} }
...@@ -4632,7 +4633,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, ...@@ -4632,7 +4633,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
if (trans) if (trans)
return; return;
if (wait_ordered) if (wait_ordered)
btrfs_wait_ordered_roots(root->fs_info, items); btrfs_wait_ordered_roots(root->fs_info, items,
0, (u64)-1);
return; return;
} }
...@@ -4671,7 +4673,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, ...@@ -4671,7 +4673,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
loops++; loops++;
if (wait_ordered && !trans) { if (wait_ordered && !trans) {
btrfs_wait_ordered_roots(root->fs_info, items); btrfs_wait_ordered_roots(root->fs_info, items,
0, (u64)-1);
} else { } else {
time_left = schedule_timeout_killable(1); time_left = schedule_timeout_killable(1);
if (time_left) if (time_left)
......
...@@ -681,7 +681,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, ...@@ -681,7 +681,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (ret) if (ret)
goto dec_and_free; goto dec_and_free;
btrfs_wait_ordered_extents(root, -1); btrfs_wait_ordered_extents(root, -1, 0, (u64)-1);
btrfs_init_block_rsv(&pending_snapshot->block_rsv, btrfs_init_block_rsv(&pending_snapshot->block_rsv,
BTRFS_BLOCK_RSV_TEMP); BTRFS_BLOCK_RSV_TEMP);
......
...@@ -661,14 +661,15 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work) ...@@ -661,14 +661,15 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
* wait for all the ordered extents in a root. This is done when balancing * wait for all the ordered extents in a root. This is done when balancing
* space between drives. * space between drives.
*/ */
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
const u64 range_start, const u64 range_len)
{ {
struct list_head splice, works; LIST_HEAD(splice);
LIST_HEAD(skipped);
LIST_HEAD(works);
struct btrfs_ordered_extent *ordered, *next; struct btrfs_ordered_extent *ordered, *next;
int count = 0; int count = 0;
const u64 range_end = range_start + range_len;
INIT_LIST_HEAD(&splice);
INIT_LIST_HEAD(&works);
mutex_lock(&root->ordered_extent_mutex); mutex_lock(&root->ordered_extent_mutex);
spin_lock(&root->ordered_extent_lock); spin_lock(&root->ordered_extent_lock);
...@@ -676,6 +677,14 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) ...@@ -676,6 +677,14 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
while (!list_empty(&splice) && nr) { while (!list_empty(&splice) && nr) {
ordered = list_first_entry(&splice, struct btrfs_ordered_extent, ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
root_extent_list); root_extent_list);
if (range_end <= ordered->start ||
ordered->start + ordered->disk_len <= range_start) {
list_move_tail(&ordered->root_extent_list, &skipped);
cond_resched_lock(&root->ordered_extent_lock);
continue;
}
list_move_tail(&ordered->root_extent_list, list_move_tail(&ordered->root_extent_list,
&root->ordered_extents); &root->ordered_extents);
atomic_inc(&ordered->refs); atomic_inc(&ordered->refs);
...@@ -694,6 +703,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) ...@@ -694,6 +703,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
nr--; nr--;
count++; count++;
} }
list_splice_tail(&skipped, &root->ordered_extents);
list_splice_tail(&splice, &root->ordered_extents); list_splice_tail(&splice, &root->ordered_extents);
spin_unlock(&root->ordered_extent_lock); spin_unlock(&root->ordered_extent_lock);
...@@ -708,7 +718,8 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) ...@@ -708,7 +718,8 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
return count; return count;
} }
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
const u64 range_start, const u64 range_len)
{ {
struct btrfs_root *root; struct btrfs_root *root;
struct list_head splice; struct list_head splice;
...@@ -728,7 +739,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) ...@@ -728,7 +739,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
&fs_info->ordered_roots); &fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock); spin_unlock(&fs_info->ordered_root_lock);
done = btrfs_wait_ordered_extents(root, nr); done = btrfs_wait_ordered_extents(root, nr,
range_start, range_len);
btrfs_put_fs_root(root); btrfs_put_fs_root(root);
spin_lock(&fs_info->ordered_root_lock); spin_lock(&fs_info->ordered_root_lock);
......
...@@ -197,8 +197,10 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, ...@@ -197,8 +197,10 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered); struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
u32 *sum, int len); u32 *sum, int len);
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); const u64 range_start, const u64 range_len);
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
const u64 range_start, const u64 range_len);
void btrfs_get_logged_extents(struct inode *inode, void btrfs_get_logged_extents(struct inode *inode,
struct list_head *logged_list, struct list_head *logged_list,
const loff_t start, const loff_t start,
......
...@@ -4259,7 +4259,9 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) ...@@ -4259,7 +4259,9 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
err = ret; err = ret;
goto out; goto out;
} }
btrfs_wait_ordered_roots(fs_info, -1); btrfs_wait_ordered_roots(fs_info, -1,
rc->block_group->key.objectid,
rc->block_group->key.offset);
while (1) { while (1) {
mutex_lock(&fs_info->cleaner_mutex); mutex_lock(&fs_info->cleaner_mutex);
......
...@@ -1160,7 +1160,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) ...@@ -1160,7 +1160,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return 0; return 0;
} }
btrfs_wait_ordered_roots(fs_info, -1); btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
trans = btrfs_attach_transaction_barrier(root); trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
......
...@@ -1821,7 +1821,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) ...@@ -1821,7 +1821,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
{ {
if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
btrfs_wait_ordered_roots(fs_info, -1); btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
} }
static inline void static inline void
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment