Commit 21c7e756 authored by Miao Xie's avatar Miao Xie Committed by Chris Mason

Btrfs: reclaim the reserved metadata space at background

Before applying this patch, the task had to reclaim the metadata space
by itself if the metadata space was not enough. And When the task started
the space reclamation, all the other tasks which wanted to reserve the
metadata space were blocked. At some cases, they would be blocked for
a long time, it made the performance fluctuate wildly.

So we introduce the background metadata space reclamation, when the space
is about to be exhausted, we insert a reclaim work into the workqueue, the
worker of the workqueue helps us to reclaim the reserved space at the
background. By this way, the tasks needn't reclaim the space by themselves at
most cases, and even if the tasks have to reclaim the space or are blocked
for the space reclamation, they will get enough space more quickly.

Here is my test result(Tested by compilebench):
 Memory:	2GB
 CPU:		2Cores * 1CPU
 Partition:	40GB(SSD)

Test command:
 # compilebench -D <mnt> -m

Without this patch:
 intial create total runs 30 avg 54.36 MB/s (user 0.52s sys 2.44s)
 compile total runs 30 avg 123.72 MB/s (user 0.13s sys 1.17s)
 read compiled tree total runs 3 avg 81.15 MB/s (user 0.74s sys 4.89s)
 delete compiled tree total runs 30 avg 5.32 seconds (user 0.35s sys 4.37s)

With this patch:
 intial create total runs 30 avg 59.80 MB/s (user 0.52s sys 2.53s)
 compile total runs 30 avg 151.44 MB/s (user 0.13s sys 1.11s)
 read compiled tree total runs 3 avg 83.25 MB/s (user 0.76s sys 4.91s)
 delete compiled tree total runs 30 avg 5.29 seconds (user 0.34s sys 4.34s)
Signed-off-by: default avatarMiao Xie <miaox@cn.fujitsu.com>
Signed-off-by: default avatarChris Mason <clm@fb.com>
parent 32d6b47f
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <asm/kmap_types.h> #include <asm/kmap_types.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/btrfs.h> #include <linux/btrfs.h>
#include <linux/workqueue.h>
#include "extent_io.h" #include "extent_io.h"
#include "extent_map.h" #include "extent_map.h"
#include "async-thread.h" #include "async-thread.h"
...@@ -1322,6 +1323,8 @@ struct btrfs_stripe_hash_table { ...@@ -1322,6 +1323,8 @@ struct btrfs_stripe_hash_table {
#define BTRFS_STRIPE_HASH_TABLE_BITS 11 #define BTRFS_STRIPE_HASH_TABLE_BITS 11
void btrfs_init_async_reclaim_work(struct work_struct *work);
/* fs_info */ /* fs_info */
struct reloc_control; struct reloc_control;
struct btrfs_device; struct btrfs_device;
...@@ -1697,6 +1700,9 @@ struct btrfs_fs_info { ...@@ -1697,6 +1700,9 @@ struct btrfs_fs_info {
struct semaphore uuid_tree_rescan_sem; struct semaphore uuid_tree_rescan_sem;
unsigned int update_uuid_tree_gen:1; unsigned int update_uuid_tree_gen:1;
/* Used to reclaim the metadata space in the background. */
struct work_struct async_reclaim_work;
}; };
struct btrfs_subvolume_writers { struct btrfs_subvolume_writers {
......
...@@ -2291,6 +2291,7 @@ int open_ctree(struct super_block *sb, ...@@ -2291,6 +2291,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->balance_cancel_req, 0); atomic_set(&fs_info->balance_cancel_req, 0);
fs_info->balance_ctl = NULL; fs_info->balance_ctl = NULL;
init_waitqueue_head(&fs_info->balance_wait_q); init_waitqueue_head(&fs_info->balance_wait_q);
btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work);
sb->s_blocksize = 4096; sb->s_blocksize = 4096;
sb->s_blocksize_bits = blksize_bits(4096); sb->s_blocksize_bits = blksize_bits(4096);
...@@ -3603,6 +3604,8 @@ int close_ctree(struct btrfs_root *root) ...@@ -3603,6 +3604,8 @@ int close_ctree(struct btrfs_root *root)
/* clear out the rbtree of defraggable inodes */ /* clear out the rbtree of defraggable inodes */
btrfs_cleanup_defrag_inodes(fs_info); btrfs_cleanup_defrag_inodes(fs_info);
cancel_work_sync(&fs_info->async_reclaim_work);
if (!(fs_info->sb->s_flags & MS_RDONLY)) { if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_commit_super(root); ret = btrfs_commit_super(root);
if (ret) if (ret)
......
...@@ -4204,6 +4204,104 @@ static int flush_space(struct btrfs_root *root, ...@@ -4204,6 +4204,104 @@ static int flush_space(struct btrfs_root *root,
return ret; return ret;
} }
static inline u64
btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
struct btrfs_space_info *space_info)
{
u64 used;
u64 expected;
u64 to_reclaim;
to_reclaim = min_t(u64, num_online_cpus() * 1024 * 1024,
16 * 1024 * 1024);
spin_lock(&space_info->lock);
if (can_overcommit(root, space_info, to_reclaim,
BTRFS_RESERVE_FLUSH_ALL)) {
to_reclaim = 0;
goto out;
}
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use;
if (can_overcommit(root, space_info, 1024 * 1024,
BTRFS_RESERVE_FLUSH_ALL))
expected = div_factor_fine(space_info->total_bytes, 95);
else
expected = div_factor_fine(space_info->total_bytes, 90);
if (used > expected)
to_reclaim = used - expected;
else
to_reclaim = 0;
to_reclaim = min(to_reclaim, space_info->bytes_may_use +
space_info->bytes_reserved);
out:
spin_unlock(&space_info->lock);
return to_reclaim;
}
static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
struct btrfs_fs_info *fs_info, u64 used)
{
return (used >= div_factor_fine(space_info->total_bytes, 98) &&
!btrfs_fs_closing(fs_info) &&
!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
}
static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
struct btrfs_fs_info *fs_info)
{
u64 used;
spin_lock(&space_info->lock);
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use;
if (need_do_async_reclaim(space_info, fs_info, used)) {
spin_unlock(&space_info->lock);
return 1;
}
spin_unlock(&space_info->lock);
return 0;
}
static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
{
struct btrfs_fs_info *fs_info;
struct btrfs_space_info *space_info;
u64 to_reclaim;
int flush_state;
fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
space_info);
if (!to_reclaim)
return;
flush_state = FLUSH_DELAYED_ITEMS_NR;
do {
flush_space(fs_info->fs_root, space_info, to_reclaim,
to_reclaim, flush_state);
flush_state++;
if (!btrfs_need_do_async_reclaim(space_info, fs_info))
return;
} while (flush_state <= COMMIT_TRANS);
if (btrfs_need_do_async_reclaim(space_info, fs_info))
queue_work(system_unbound_wq, work);
}
void btrfs_init_async_reclaim_work(struct work_struct *work)
{
INIT_WORK(work, btrfs_async_reclaim_metadata_space);
}
/** /**
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
* @root - the root we're allocating for * @root - the root we're allocating for
...@@ -4311,8 +4409,13 @@ static int reserve_metadata_bytes(struct btrfs_root *root, ...@@ -4311,8 +4409,13 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
flushing = true; flushing = true;
space_info->flush = 1; space_info->flush = 1;
} else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
used += orig_bytes;
if (need_do_async_reclaim(space_info, root->fs_info, used) &&
!work_busy(&root->fs_info->async_reclaim_work))
queue_work(system_unbound_wq,
&root->fs_info->async_reclaim_work);
} }
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
......
...@@ -1413,6 +1413,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) ...@@ -1413,6 +1413,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
* this also happens on 'umount -rf' or on shutdown, when * this also happens on 'umount -rf' or on shutdown, when
* the filesystem is busy. * the filesystem is busy.
*/ */
cancel_work_sync(&fs_info->async_reclaim_work);
/* wait for the uuid_scan task to finish */ /* wait for the uuid_scan task to finish */
down(&fs_info->uuid_tree_rescan_sem); down(&fs_info->uuid_tree_rescan_sem);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment