Commit 837d5b6e authored by Ilya Dryomov's avatar Ilya Dryomov

Btrfs: allow for pausing restriper

Implement an ioctl for pausing restriper.  This pauses the relocation,
but balance is still considered to be "in progress": balance item is
not deleted, other volume operations cannot be started, etc.  If paused
in the middle of profile changing operation we will continue making
allocations with the target profile.

Add a hook to close_ctree() to pause restriper and free its data
structures on unmount.  (It's safe to unmount when restriper is in
"paused" state, we will resume with the same parameters on the next
mount)
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent 9555c6c1
...@@ -1214,7 +1214,10 @@ struct btrfs_fs_info { ...@@ -1214,7 +1214,10 @@ struct btrfs_fs_info {
/* restriper state */ /* restriper state */
spinlock_t balance_lock; spinlock_t balance_lock;
struct mutex balance_mutex; struct mutex balance_mutex;
atomic_t balance_running;
atomic_t balance_pause_req;
struct btrfs_balance_control *balance_ctl; struct btrfs_balance_control *balance_ctl;
wait_queue_head_t balance_wait_q;
unsigned data_chunk_allocations; unsigned data_chunk_allocations;
unsigned metadata_ratio; unsigned metadata_ratio;
...@@ -2658,6 +2661,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) ...@@ -2658,6 +2661,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
} }
static inline void free_fs_info(struct btrfs_fs_info *fs_info) static inline void free_fs_info(struct btrfs_fs_info *fs_info)
{ {
kfree(fs_info->balance_ctl);
kfree(fs_info->delayed_root); kfree(fs_info->delayed_root);
kfree(fs_info->extent_root); kfree(fs_info->extent_root);
kfree(fs_info->tree_root); kfree(fs_info->tree_root);
......
...@@ -2004,7 +2004,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, ...@@ -2004,7 +2004,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->balance_lock); spin_lock_init(&fs_info->balance_lock);
mutex_init(&fs_info->balance_mutex); mutex_init(&fs_info->balance_mutex);
atomic_set(&fs_info->balance_running, 0);
atomic_set(&fs_info->balance_pause_req, 0);
fs_info->balance_ctl = NULL; fs_info->balance_ctl = NULL;
init_waitqueue_head(&fs_info->balance_wait_q);
sb->s_blocksize = 4096; sb->s_blocksize = 4096;
sb->s_blocksize_bits = blksize_bits(4096); sb->s_blocksize_bits = blksize_bits(4096);
...@@ -2980,6 +2983,9 @@ int close_ctree(struct btrfs_root *root) ...@@ -2980,6 +2983,9 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 1; fs_info->closing = 1;
smp_mb(); smp_mb();
/* pause restriper - we want to resume on mount */
btrfs_pause_balance(root->fs_info);
btrfs_scrub_cancel(root); btrfs_scrub_cancel(root);
/* wait for any defraggers to finish */ /* wait for any defraggers to finish */
......
...@@ -3072,6 +3072,11 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, ...@@ -3072,6 +3072,11 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
bargs->flags = bctl->flags; bargs->flags = bctl->flags;
if (atomic_read(&fs_info->balance_running))
bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
if (atomic_read(&fs_info->balance_pause_req))
bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
memcpy(&bargs->data, &bctl->data, sizeof(bargs->data)); memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta)); memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys)); memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
...@@ -3103,6 +3108,11 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) ...@@ -3103,6 +3108,11 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
bargs = NULL; bargs = NULL;
} }
if (fs_info->balance_ctl) {
ret = -EINPROGRESS;
goto out_bargs;
}
bctl = kzalloc(sizeof(*bctl), GFP_NOFS); bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
if (!bctl) { if (!bctl) {
ret = -ENOMEM; ret = -ENOMEM;
...@@ -3123,7 +3133,8 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) ...@@ -3123,7 +3133,8 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
ret = btrfs_balance(bctl, bargs); ret = btrfs_balance(bctl, bargs);
/* /*
* bctl is freed in __cancel_balance * bctl is freed in __cancel_balance or in free_fs_info if
* restriper was paused all the way until unmount
*/ */
if (arg) { if (arg) {
if (copy_to_user(arg, bargs, sizeof(*bargs))) if (copy_to_user(arg, bargs, sizeof(*bargs)))
...@@ -3138,6 +3149,19 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) ...@@ -3138,6 +3149,19 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
return ret; return ret;
} }
static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd)
{
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
switch (cmd) {
case BTRFS_BALANCE_CTL_PAUSE:
return btrfs_pause_balance(root->fs_info);
}
return -EINVAL;
}
long btrfs_ioctl(struct file *file, unsigned int long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg) cmd, unsigned long arg)
{ {
...@@ -3216,6 +3240,8 @@ long btrfs_ioctl(struct file *file, unsigned int ...@@ -3216,6 +3240,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_scrub_progress(root, argp); return btrfs_ioctl_scrub_progress(root, argp);
case BTRFS_IOC_BALANCE_V2: case BTRFS_IOC_BALANCE_V2:
return btrfs_ioctl_balance(root, argp); return btrfs_ioctl_balance(root, argp);
case BTRFS_IOC_BALANCE_CTL:
return btrfs_ioctl_balance_ctl(root, arg);
} }
return -ENOTTY; return -ENOTTY;
......
...@@ -109,6 +109,9 @@ struct btrfs_ioctl_fs_info_args { ...@@ -109,6 +109,9 @@ struct btrfs_ioctl_fs_info_args {
__u64 reserved[124]; /* pad to 1k */ __u64 reserved[124]; /* pad to 1k */
}; };
/* balance control ioctl modes */
#define BTRFS_BALANCE_CTL_PAUSE 1
/* /*
* this is packed, because it should be exactly the same as its disk * this is packed, because it should be exactly the same as its disk
* byte order counterpart (struct btrfs_disk_balance_args) * byte order counterpart (struct btrfs_disk_balance_args)
...@@ -137,6 +140,9 @@ struct btrfs_balance_progress { ...@@ -137,6 +140,9 @@ struct btrfs_balance_progress {
__u64 completed; /* # of chunks relocated so far */ __u64 completed; /* # of chunks relocated so far */
}; };
#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0)
#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1)
struct btrfs_ioctl_balance_args { struct btrfs_ioctl_balance_args {
__u64 flags; /* in/out */ __u64 flags; /* in/out */
__u64 state; /* out */ __u64 state; /* out */
...@@ -315,6 +321,7 @@ struct btrfs_ioctl_logical_ino_args { ...@@ -315,6 +321,7 @@ struct btrfs_ioctl_logical_ino_args {
struct btrfs_ioctl_fs_info_args) struct btrfs_ioctl_fs_info_args)
#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \ #define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
struct btrfs_ioctl_balance_args) struct btrfs_ioctl_balance_args)
#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int)
#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ #define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
struct btrfs_ioctl_ino_path_args) struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
......
...@@ -2492,6 +2492,11 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) ...@@ -2492,6 +2492,11 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
key.type = BTRFS_CHUNK_ITEM_KEY; key.type = BTRFS_CHUNK_ITEM_KEY;
while (1) { while (1) {
if (atomic_read(&fs_info->balance_pause_req)) {
ret = -ECANCELED;
goto error;
}
ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
if (ret < 0) if (ret < 0)
goto error; goto error;
...@@ -2553,6 +2558,11 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) ...@@ -2553,6 +2558,11 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
return ret; return ret;
} }
static inline int balance_need_close(struct btrfs_fs_info *fs_info)
{
return atomic_read(&fs_info->balance_pause_req) == 0;
}
static void __cancel_balance(struct btrfs_fs_info *fs_info) static void __cancel_balance(struct btrfs_fs_info *fs_info)
{ {
int ret; int ret;
...@@ -2575,7 +2585,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl, ...@@ -2575,7 +2585,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
u64 allowed; u64 allowed;
int ret; int ret;
if (btrfs_fs_closing(fs_info)) { if (btrfs_fs_closing(fs_info) ||
atomic_read(&fs_info->balance_pause_req)) {
ret = -EINVAL; ret = -EINVAL;
goto out; goto out;
} }
...@@ -2680,18 +2691,25 @@ int btrfs_balance(struct btrfs_balance_control *bctl, ...@@ -2680,18 +2691,25 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
spin_unlock(&fs_info->balance_lock); spin_unlock(&fs_info->balance_lock);
} }
atomic_inc(&fs_info->balance_running);
mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->balance_mutex);
ret = __btrfs_balance(fs_info); ret = __btrfs_balance(fs_info);
mutex_lock(&fs_info->balance_mutex); mutex_lock(&fs_info->balance_mutex);
atomic_dec(&fs_info->balance_running);
if (bargs) { if (bargs) {
memset(bargs, 0, sizeof(*bargs)); memset(bargs, 0, sizeof(*bargs));
update_ioctl_balance_args(fs_info, bargs); update_ioctl_balance_args(fs_info, bargs);
} }
__cancel_balance(fs_info); if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
balance_need_close(fs_info)) {
__cancel_balance(fs_info);
}
wake_up(&fs_info->balance_wait_q);
return ret; return ret;
out: out:
...@@ -2785,6 +2803,35 @@ int btrfs_recover_balance(struct btrfs_root *tree_root) ...@@ -2785,6 +2803,35 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
return ret; return ret;
} }
int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
{
int ret = 0;
mutex_lock(&fs_info->balance_mutex);
if (!fs_info->balance_ctl) {
mutex_unlock(&fs_info->balance_mutex);
return -ENOTCONN;
}
if (atomic_read(&fs_info->balance_running)) {
atomic_inc(&fs_info->balance_pause_req);
mutex_unlock(&fs_info->balance_mutex);
wait_event(fs_info->balance_wait_q,
atomic_read(&fs_info->balance_running) == 0);
mutex_lock(&fs_info->balance_mutex);
/* we are good with balance_ctl ripped off from under us */
BUG_ON(atomic_read(&fs_info->balance_running));
atomic_dec(&fs_info->balance_pause_req);
} else {
ret = -ENOTCONN;
}
mutex_unlock(&fs_info->balance_mutex);
return ret;
}
/* /*
* shrinking a device means finding all of the device extents past * shrinking a device means finding all of the device extents past
* the new size, and then following the back refs to the chunks. * the new size, and then following the back refs to the chunks.
......
...@@ -273,6 +273,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *path); ...@@ -273,6 +273,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *path);
int btrfs_balance(struct btrfs_balance_control *bctl, int btrfs_balance(struct btrfs_balance_control *bctl,
struct btrfs_ioctl_balance_args *bargs); struct btrfs_ioctl_balance_args *bargs);
int btrfs_recover_balance(struct btrfs_root *tree_root); int btrfs_recover_balance(struct btrfs_root *tree_root);
int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
int find_free_dev_extent(struct btrfs_trans_handle *trans, int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes, struct btrfs_device *device, u64 num_bytes,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment