Commit 59641015 authored by Ilya Dryomov's avatar Ilya Dryomov

Btrfs: recover balance on mount

On mount, if balance item is found, resume balance in a separate
kernel thread.

Try to be smart to continue roughly where previous balance (or convert)
was interrupted.  For chunk types that were being converted to some
profile we turn on soft convert, in case of a simple balance we turn on
usage filter and relocate only less-than-90%-full chunks of that type.
These are just heuristics but they help quite a bit, and can be improved
in future.
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent 0940ebf6
...@@ -2427,6 +2427,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, ...@@ -2427,6 +2427,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!err) if (!err)
err = btrfs_orphan_cleanup(fs_info->tree_root); err = btrfs_orphan_cleanup(fs_info->tree_root);
up_read(&fs_info->cleanup_work_sem); up_read(&fs_info->cleanup_work_sem);
if (!err)
err = btrfs_recover_balance(fs_info->tree_root);
if (err) { if (err) {
close_ctree(tree_root); close_ctree(tree_root);
return ERR_PTR(err); return ERR_PTR(err);
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <linux/random.h> #include <linux/random.h>
#include <linux/iocontext.h> #include <linux/iocontext.h>
#include <linux/capability.h> #include <linux/capability.h>
#include <linux/kthread.h>
#include <asm/div64.h> #include <asm/div64.h>
#include "compat.h" #include "compat.h"
#include "ctree.h" #include "ctree.h"
...@@ -2164,6 +2165,46 @@ static int del_balance_item(struct btrfs_root *root) ...@@ -2164,6 +2165,46 @@ static int del_balance_item(struct btrfs_root *root)
return ret; return ret;
} }
/*
* This is a heuristic used to reduce the number of chunks balanced on
* resume after balance was interrupted.
*/
static void update_balance_args(struct btrfs_balance_control *bctl)
{
/*
* Turn on soft mode for chunk types that were being converted.
*/
if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
/*
* Turn on usage filter if is not already used. The idea is
* that chunks that we have already balanced should be
* reasonably full. Don't do it for chunks that are being
* converted - that will keep us from relocating unconverted
* (albeit full) chunks.
*/
if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
!(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
bctl->data.usage = 90;
}
if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
!(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
bctl->sys.usage = 90;
}
if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
!(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
bctl->meta.usage = 90;
}
}
/* /*
* Should be called with both balance and volume mutexes held to * Should be called with both balance and volume mutexes held to
* serialize other volume operations (add_dev/rm_dev/resize) with * serialize other volume operations (add_dev/rm_dev/resize) with
...@@ -2626,10 +2667,18 @@ int btrfs_balance(struct btrfs_balance_control *bctl, ...@@ -2626,10 +2667,18 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
do_balance: do_balance:
ret = insert_balance_item(fs_info->tree_root, bctl); ret = insert_balance_item(fs_info->tree_root, bctl);
if (ret) if (ret && ret != -EEXIST)
goto out; goto out;
if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
BUG_ON(ret == -EEXIST);
set_balance_control(bctl); set_balance_control(bctl);
} else {
BUG_ON(ret != -EEXIST);
spin_lock(&fs_info->balance_lock);
update_balance_args(bctl);
spin_unlock(&fs_info->balance_lock);
}
mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->balance_mutex);
...@@ -2646,10 +2695,92 @@ int btrfs_balance(struct btrfs_balance_control *bctl, ...@@ -2646,10 +2695,92 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
return ret; return ret;
out: out:
if (bctl->flags & BTRFS_BALANCE_RESUME)
__cancel_balance(fs_info);
else
kfree(bctl); kfree(bctl);
return ret; return ret;
} }
static int balance_kthread(void *data)
{
struct btrfs_balance_control *bctl =
(struct btrfs_balance_control *)data;
struct btrfs_fs_info *fs_info = bctl->fs_info;
int ret;
mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
set_balance_control(bctl);
printk(KERN_INFO "btrfs: continuing balance\n");
ret = btrfs_balance(bctl, NULL);
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
return ret;
}
int btrfs_recover_balance(struct btrfs_root *tree_root)
{
struct task_struct *tsk;
struct btrfs_balance_control *bctl;
struct btrfs_balance_item *item;
struct btrfs_disk_balance_args disk_bargs;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_key key;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
if (!bctl) {
ret = -ENOMEM;
goto out;
}
key.objectid = BTRFS_BALANCE_OBJECTID;
key.type = BTRFS_BALANCE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
if (ret < 0)
goto out_bctl;
if (ret > 0) { /* ret = -ENOENT; */
ret = 0;
goto out_bctl;
}
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
bctl->fs_info = tree_root->fs_info;
bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME;
btrfs_balance_data(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
btrfs_balance_meta(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
btrfs_balance_sys(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
tsk = kthread_run(balance_kthread, bctl, "btrfs-balance");
if (IS_ERR(tsk))
ret = PTR_ERR(tsk);
else
goto out;
out_bctl:
kfree(bctl);
out:
btrfs_free_path(path);
return ret;
}
/* /*
* shrinking a device means finding all of the device extents past * shrinking a device means finding all of the device extents past
* the new size, and then following the back refs to the chunks. * the new size, and then following the back refs to the chunks.
......
...@@ -198,6 +198,7 @@ struct map_lookup { ...@@ -198,6 +198,7 @@ struct map_lookup {
BTRFS_BALANCE_METADATA) BTRFS_BALANCE_METADATA)
#define BTRFS_BALANCE_FORCE (1ULL << 3) #define BTRFS_BALANCE_FORCE (1ULL << 3)
#define BTRFS_BALANCE_RESUME (1ULL << 4)
/* /*
* Balance filters * Balance filters
...@@ -271,6 +272,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); ...@@ -271,6 +272,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
int btrfs_init_new_device(struct btrfs_root *root, char *path); int btrfs_init_new_device(struct btrfs_root *root, char *path);
int btrfs_balance(struct btrfs_balance_control *bctl, int btrfs_balance(struct btrfs_balance_control *bctl,
struct btrfs_ioctl_balance_args *bargs); struct btrfs_ioctl_balance_args *bargs);
int btrfs_recover_balance(struct btrfs_root *tree_root);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
int find_free_dev_extent(struct btrfs_trans_handle *trans, int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes, struct btrfs_device *device, u64 num_bytes,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment