Commit 27263e28 authored by Chris Mason's avatar Chris Mason

Merge branch 'restriper' of git://github.com/idryomov/btrfs-unstable into integration

parents 64e05503 19a39dce
......@@ -86,6 +86,9 @@ struct btrfs_ordered_sum;
/* holds checksums of all the data extents */
#define BTRFS_CSUM_TREE_OBJECTID 7ULL
/* for storing balance parameters in the root tree */
#define BTRFS_BALANCE_OBJECTID -4ULL
/* orhpan objectid for tracking unlinked/truncated files */
#define BTRFS_ORPHAN_OBJECTID -5ULL
......@@ -692,6 +695,54 @@ struct btrfs_root_ref {
__le16 name_len;
} __attribute__ ((__packed__));
struct btrfs_disk_balance_args {
/*
* profiles to operate on, single is denoted by
* BTRFS_AVAIL_ALLOC_BIT_SINGLE
*/
__le64 profiles;
/* usage filter */
__le64 usage;
/* devid filter */
__le64 devid;
/* devid subset filter [pstart..pend) */
__le64 pstart;
__le64 pend;
/* btrfs virtual address space subset filter [vstart..vend) */
__le64 vstart;
__le64 vend;
/*
* profile to convert to, single is denoted by
* BTRFS_AVAIL_ALLOC_BIT_SINGLE
*/
__le64 target;
/* BTRFS_BALANCE_ARGS_* */
__le64 flags;
__le64 unused[8];
} __attribute__ ((__packed__));
/*
* store balance parameters to disk so that balance can be properly
* resumed after crash or unmount
*/
struct btrfs_balance_item {
/* BTRFS_BALANCE_* */
__le64 flags;
struct btrfs_disk_balance_args data;
struct btrfs_disk_balance_args meta;
struct btrfs_disk_balance_args sys;
__le64 unused[4];
} __attribute__ ((__packed__));
#define BTRFS_FILE_EXTENT_INLINE 0
#define BTRFS_FILE_EXTENT_REG 1
#define BTRFS_FILE_EXTENT_PREALLOC 2
......@@ -751,15 +802,33 @@ struct btrfs_csum_item {
} __attribute__ ((__packed__));
/* different types of block groups (and chunks) */
#define BTRFS_BLOCK_GROUP_DATA (1 << 0)
#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1)
#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
#define BTRFS_BLOCK_GROUP_RAID0 (1 << 3)
#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4)
#define BTRFS_BLOCK_GROUP_DUP (1 << 5)
#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6)
#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2)
#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3)
#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE
#define BTRFS_NR_RAID_TYPES 5
#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
BTRFS_BLOCK_GROUP_SYSTEM | \
BTRFS_BLOCK_GROUP_METADATA)
#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
BTRFS_BLOCK_GROUP_RAID1 | \
BTRFS_BLOCK_GROUP_DUP | \
BTRFS_BLOCK_GROUP_RAID10)
/*
* We need a bit for restriper to be able to tell when chunks of type
* SINGLE are available. This "extended" profile format is used in
* fs_info->avail_*_alloc_bits (in-memory) and balance item fields
* (on-disk). The corresponding on-disk bit in chunk.type is reserved
* to avoid remappings between two formats in future.
*/
#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
struct btrfs_block_group_item {
__le64 used;
__le64 chunk_objectid;
......@@ -916,6 +985,7 @@ struct btrfs_block_group_cache {
struct reloc_control;
struct btrfs_device;
struct btrfs_fs_devices;
struct btrfs_balance_control;
struct btrfs_delayed_root;
struct btrfs_fs_info {
u8 fsid[BTRFS_FSID_SIZE];
......@@ -1132,12 +1202,23 @@ struct btrfs_fs_info {
spinlock_t ref_cache_lock;
u64 total_ref_cache_size;
/*
* these three are in extended format (availability of single
* chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other
* types are denoted by corresponding BTRFS_BLOCK_GROUP_* bits)
*/
u64 avail_data_alloc_bits;
u64 avail_metadata_alloc_bits;
u64 avail_system_alloc_bits;
u64 data_alloc_profile;
u64 metadata_alloc_profile;
u64 system_alloc_profile;
/* restriper state */
spinlock_t balance_lock;
struct mutex balance_mutex;
atomic_t balance_running;
atomic_t balance_pause_req;
atomic_t balance_cancel_req;
struct btrfs_balance_control *balance_ctl;
wait_queue_head_t balance_wait_q;
unsigned data_chunk_allocations;
unsigned metadata_ratio;
......@@ -1383,6 +1464,8 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_DEV_ITEM_KEY 216
#define BTRFS_CHUNK_ITEM_KEY 228
#define BTRFS_BALANCE_ITEM_KEY 248
/*
* string items are for debugging. They just store a short string of
* data in the FS
......@@ -1413,6 +1496,7 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
#define BTRFS_MOUNT_RECOVERY (1 << 18)
#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
......@@ -2077,8 +2161,86 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
num_devices, 64);
/* struct btrfs_super_block */
/* struct btrfs_balance_item */
BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
static inline void btrfs_balance_data(struct extent_buffer *eb,
struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
read_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
}
static inline void btrfs_set_balance_data(struct extent_buffer *eb,
struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
write_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
}
static inline void btrfs_balance_meta(struct extent_buffer *eb,
struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
}
static inline void btrfs_set_balance_meta(struct extent_buffer *eb,
struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
}
static inline void btrfs_balance_sys(struct extent_buffer *eb,
struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
}
static inline void btrfs_set_balance_sys(struct extent_buffer *eb,
struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
}
static inline void
btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
struct btrfs_disk_balance_args *disk)
{
memset(cpu, 0, sizeof(*cpu));
cpu->profiles = le64_to_cpu(disk->profiles);
cpu->usage = le64_to_cpu(disk->usage);
cpu->devid = le64_to_cpu(disk->devid);
cpu->pstart = le64_to_cpu(disk->pstart);
cpu->pend = le64_to_cpu(disk->pend);
cpu->vstart = le64_to_cpu(disk->vstart);
cpu->vend = le64_to_cpu(disk->vend);
cpu->target = le64_to_cpu(disk->target);
cpu->flags = le64_to_cpu(disk->flags);
}
static inline void
btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
struct btrfs_balance_args *cpu)
{
memset(disk, 0, sizeof(*disk));
disk->profiles = cpu_to_le64(cpu->profiles);
disk->usage = cpu_to_le64(cpu->usage);
disk->devid = cpu_to_le64(cpu->devid);
disk->pstart = cpu_to_le64(cpu->pstart);
disk->pend = cpu_to_le64(cpu->pend);
disk->vstart = cpu_to_le64(cpu->vstart);
disk->vend = cpu_to_le64(cpu->vend);
disk->target = cpu_to_le64(cpu->target);
disk->flags = cpu_to_le64(cpu->flags);
}
/* struct btrfs_super_block */
BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
......@@ -2500,6 +2662,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
}
static inline void free_fs_info(struct btrfs_fs_info *fs_info)
{
kfree(fs_info->balance_ctl);
kfree(fs_info->delayed_root);
kfree(fs_info->extent_root);
kfree(fs_info->tree_root);
......@@ -2510,6 +2673,24 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
kfree(fs_info->super_for_commit);
kfree(fs_info);
}
/**
* profile_is_valid - tests whether a given profile is valid and reduced
* @flags: profile to validate
* @extended: if true @flags is treated as an extended profile
*/
static inline int profile_is_valid(u64 flags, int extended)
{
u64 mask = ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
if (extended)
mask &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (flags & mask)
return 0;
/* true if zero or exactly one bit set */
return (flags & (~flags + 1)) == flags;
}
/* root-item.c */
int btrfs_find_root_ref(struct btrfs_root *tree_root,
......
......@@ -2002,6 +2002,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_rwsem(&fs_info->scrub_super_lock);
fs_info->scrub_workers_refcnt = 0;
spin_lock_init(&fs_info->balance_lock);
mutex_init(&fs_info->balance_mutex);
atomic_set(&fs_info->balance_running, 0);
atomic_set(&fs_info->balance_pause_req, 0);
atomic_set(&fs_info->balance_cancel_req, 0);
fs_info->balance_ctl = NULL;
init_waitqueue_head(&fs_info->balance_wait_q);
sb->s_blocksize = 4096;
sb->s_blocksize_bits = blksize_bits(4096);
sb->s_bdi = &fs_info->bdi;
......@@ -2321,9 +2329,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
fs_info->data_alloc_profile = (u64)-1;
fs_info->metadata_alloc_profile = (u64)-1;
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
ret = btrfs_init_space_info(fs_info);
if (ret) {
......@@ -2426,6 +2431,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!err)
err = btrfs_orphan_cleanup(fs_info->tree_root);
up_read(&fs_info->cleanup_work_sem);
if (!err)
err = btrfs_recover_balance(fs_info->tree_root);
if (err) {
close_ctree(tree_root);
return ERR_PTR(err);
......@@ -2975,6 +2984,9 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 1;
smp_mb();
/* pause restriper - we want to resume on mount */
btrfs_pause_balance(root->fs_info);
btrfs_scrub_cancel(root);
/* wait for any defraggers to finish */
......
......@@ -618,8 +618,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
struct list_head *head = &info->space_info;
struct btrfs_space_info *found;
flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM |
BTRFS_BLOCK_GROUP_METADATA;
flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
......@@ -2999,9 +2998,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
INIT_LIST_HEAD(&found->block_groups[i]);
init_rwsem(&found->groups_sem);
spin_lock_init(&found->lock);
found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
BTRFS_BLOCK_GROUP_SYSTEM |
BTRFS_BLOCK_GROUP_METADATA);
found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
found->total_bytes = total_bytes;
found->disk_total = total_bytes * factor;
found->bytes_used = bytes_used;
......@@ -3022,20 +3019,27 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10 |
BTRFS_BLOCK_GROUP_DUP);
if (extra_flags) {
u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK;
/* chunk -> extended profile */
if (extra_flags == 0)
extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (flags & BTRFS_BLOCK_GROUP_DATA)
fs_info->avail_data_alloc_bits |= extra_flags;
if (flags & BTRFS_BLOCK_GROUP_METADATA)
fs_info->avail_metadata_alloc_bits |= extra_flags;
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
fs_info->avail_system_alloc_bits |= extra_flags;
}
}
/*
* @flags: available profiles in extended format (see ctree.h)
*
* Returns reduced profile in chunk format. If profile changing is in
* progress (either running or paused) picks the target profile (if it's
* already available), otherwise falls back to plain reducing.
*/
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
{
/*
......@@ -3046,6 +3050,34 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
u64 num_devices = root->fs_info->fs_devices->rw_devices +
root->fs_info->fs_devices->missing_devices;
/* pick restriper's target profile if it's available */
spin_lock(&root->fs_info->balance_lock);
if (root->fs_info->balance_ctl) {
struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
u64 tgt = 0;
if ((flags & BTRFS_BLOCK_GROUP_DATA) &&
(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
(flags & bctl->data.target)) {
tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
} else if ((flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
(flags & bctl->sys.target)) {
tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
} else if ((flags & BTRFS_BLOCK_GROUP_METADATA) &&
(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
(flags & bctl->meta.target)) {
tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
}
if (tgt) {
spin_unlock(&root->fs_info->balance_lock);
flags = tgt;
goto out;
}
}
spin_unlock(&root->fs_info->balance_lock);
if (num_devices == 1)
flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
if (num_devices < 4)
......@@ -3065,22 +3097,25 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
((flags & BTRFS_BLOCK_GROUP_RAID1) |
(flags & BTRFS_BLOCK_GROUP_RAID10) |
(flags & BTRFS_BLOCK_GROUP_DUP)))
(flags & BTRFS_BLOCK_GROUP_DUP))) {
flags &= ~BTRFS_BLOCK_GROUP_RAID0;
}
out:
/* extended -> chunk profile */
flags &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
return flags;
}
static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
{
if (flags & BTRFS_BLOCK_GROUP_DATA)
flags |= root->fs_info->avail_data_alloc_bits &
root->fs_info->data_alloc_profile;
flags |= root->fs_info->avail_data_alloc_bits;
else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
flags |= root->fs_info->avail_system_alloc_bits &
root->fs_info->system_alloc_profile;
flags |= root->fs_info->avail_system_alloc_bits;
else if (flags & BTRFS_BLOCK_GROUP_METADATA)
flags |= root->fs_info->avail_metadata_alloc_bits &
root->fs_info->metadata_alloc_profile;
flags |= root->fs_info->avail_metadata_alloc_bits;
return btrfs_reduce_alloc_profile(root, flags);
}
......@@ -3282,7 +3317,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
int wait_for_alloc = 0;
int ret = 0;
flags = btrfs_reduce_alloc_profile(extent_root, flags);
BUG_ON(!profile_is_valid(flags, 0));
space_info = __find_space_info(extent_root->fs_info, flags);
if (!space_info) {
......@@ -6792,6 +6827,29 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
if (root->fs_info->balance_ctl) {
struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
u64 tgt = 0;
/* pick restriper's target profile and return */
if (flags & BTRFS_BLOCK_GROUP_DATA &&
bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
} else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
} else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
}
if (tgt) {
/* extended -> chunk profile */
tgt &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
return tgt;
}
}
/*
* we add in the count of missing devices because we want
* to make sure that any RAID levels on a degraded FS
......@@ -7466,6 +7524,22 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
return 0;
}
static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK;
/* chunk -> extended profile */
if (extra_flags == 0)
extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (flags & BTRFS_BLOCK_GROUP_DATA)
fs_info->avail_data_alloc_bits &= ~extra_flags;
if (flags & BTRFS_BLOCK_GROUP_METADATA)
fs_info->avail_metadata_alloc_bits &= ~extra_flags;
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
fs_info->avail_system_alloc_bits &= ~extra_flags;
}
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start)
{
......@@ -7476,6 +7550,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_key key;
struct inode *inode;
int ret;
int index;
int factor;
root = root->fs_info->extent_root;
......@@ -7491,6 +7566,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
free_excluded_extents(root, block_group);
memcpy(&key, &block_group->key, sizeof(key));
index = get_block_group_index(block_group);
if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10))
......@@ -7565,6 +7641,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* are still on the list after taking the semaphore
*/
list_del_init(&block_group->list);
if (list_empty(&block_group->space_info->block_groups[index]))
clear_avail_alloc_bits(root->fs_info, block_group->flags);
up_write(&block_group->space_info->groups_sem);
if (block_group->cached == BTRFS_CACHE_STARTED)
......
......@@ -1203,13 +1203,21 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
mutex_lock(&root->fs_info->volume_mutex);
if (root->fs_info->balance_ctl) {
printk(KERN_INFO "btrfs: balance in progress\n");
ret = -EINVAL;
goto out;
}
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
goto out;
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
mutex_lock(&root->fs_info->volume_mutex);
sizestr = vol_args->name;
devstr = strchr(sizestr, ':');
if (devstr) {
......@@ -1226,7 +1234,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
(unsigned long long)devid);
ret = -EINVAL;
goto out_unlock;
goto out_free;
}
if (!strcmp(sizestr, "max"))
new_size = device->bdev->bd_inode->i_size;
......@@ -1241,7 +1249,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
new_size = memparse(sizestr, NULL);
if (new_size == 0) {
ret = -EINVAL;
goto out_unlock;
goto out_free;
}
}
......@@ -1250,7 +1258,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
if (mod < 0) {
if (new_size > old_size) {
ret = -EINVAL;
goto out_unlock;
goto out_free;
}
new_size = old_size - new_size;
} else if (mod > 0) {
......@@ -1259,11 +1267,11 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
if (new_size < 256 * 1024 * 1024) {
ret = -EINVAL;
goto out_unlock;
goto out_free;
}
if (new_size > device->bdev->bd_inode->i_size) {
ret = -EFBIG;
goto out_unlock;
goto out_free;
}
do_div(new_size, root->sectorsize);
......@@ -1276,7 +1284,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_unlock;
goto out_free;
}
ret = btrfs_grow_device(trans, device, new_size);
btrfs_commit_transaction(trans, root);
......@@ -1284,9 +1292,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
ret = btrfs_shrink_device(device, new_size);
}
out_unlock:
mutex_unlock(&root->fs_info->volume_mutex);
out_free:
kfree(vol_args);
out:
mutex_unlock(&root->fs_info->volume_mutex);
return ret;
}
......@@ -2052,14 +2061,25 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
mutex_lock(&root->fs_info->volume_mutex);
if (root->fs_info->balance_ctl) {
printk(KERN_INFO "btrfs: balance in progress\n");
ret = -EINVAL;
goto out;
}
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
goto out;
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
ret = btrfs_init_new_device(root, vol_args->name);
kfree(vol_args);
out:
mutex_unlock(&root->fs_info->volume_mutex);
return ret;
}
......@@ -2074,14 +2094,25 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
mutex_lock(&root->fs_info->volume_mutex);
if (root->fs_info->balance_ctl) {
printk(KERN_INFO "btrfs: balance in progress\n");
ret = -EINVAL;
goto out;
}
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
goto out;
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
ret = btrfs_rm_device(root, vol_args->name);
kfree(vol_args);
out:
mutex_unlock(&root->fs_info->volume_mutex);
return ret;
}
......@@ -3034,6 +3065,163 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
return ret;
}
void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
struct btrfs_ioctl_balance_args *bargs)
{
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
bargs->flags = bctl->flags;
if (atomic_read(&fs_info->balance_running))
bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
if (atomic_read(&fs_info->balance_pause_req))
bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
if (atomic_read(&fs_info->balance_cancel_req))
bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ;
memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
if (lock) {
spin_lock(&fs_info->balance_lock);
memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
spin_unlock(&fs_info->balance_lock);
} else {
memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
}
}
static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ioctl_balance_args *bargs;
struct btrfs_balance_control *bctl;
int ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
if (arg) {
bargs = memdup_user(arg, sizeof(*bargs));
if (IS_ERR(bargs)) {
ret = PTR_ERR(bargs);
goto out;
}
if (bargs->flags & BTRFS_BALANCE_RESUME) {
if (!fs_info->balance_ctl) {
ret = -ENOTCONN;
goto out_bargs;
}
bctl = fs_info->balance_ctl;
spin_lock(&fs_info->balance_lock);
bctl->flags |= BTRFS_BALANCE_RESUME;
spin_unlock(&fs_info->balance_lock);
goto do_balance;
}
} else {
bargs = NULL;
}
if (fs_info->balance_ctl) {
ret = -EINPROGRESS;
goto out_bargs;
}
bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
if (!bctl) {
ret = -ENOMEM;
goto out_bargs;
}
bctl->fs_info = fs_info;
if (arg) {
memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));
bctl->flags = bargs->flags;
} else {
/* balance everything - no filters */
bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
}
do_balance:
ret = btrfs_balance(bctl, bargs);
/*
* bctl is freed in __cancel_balance or in free_fs_info if
* restriper was paused all the way until unmount
*/
if (arg) {
if (copy_to_user(arg, bargs, sizeof(*bargs)))
ret = -EFAULT;
}
out_bargs:
kfree(bargs);
out:
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
return ret;
}
static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd)
{
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
switch (cmd) {
case BTRFS_BALANCE_CTL_PAUSE:
return btrfs_pause_balance(root->fs_info);
case BTRFS_BALANCE_CTL_CANCEL:
return btrfs_cancel_balance(root->fs_info);
}
return -EINVAL;
}
static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
void __user *arg)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ioctl_balance_args *bargs;
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
mutex_lock(&fs_info->balance_mutex);
if (!fs_info->balance_ctl) {
ret = -ENOTCONN;
goto out;
}
bargs = kzalloc(sizeof(*bargs), GFP_NOFS);
if (!bargs) {
ret = -ENOMEM;
goto out;
}
update_ioctl_balance_args(fs_info, 1, bargs);
if (copy_to_user(arg, bargs, sizeof(*bargs)))
ret = -EFAULT;
kfree(bargs);
out:
mutex_unlock(&fs_info->balance_mutex);
return ret;
}
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
......@@ -3078,7 +3266,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_DEV_INFO:
return btrfs_ioctl_dev_info(root, argp);
case BTRFS_IOC_BALANCE:
return btrfs_balance(root->fs_info->dev_root);
return btrfs_ioctl_balance(root, NULL);
case BTRFS_IOC_CLONE:
return btrfs_ioctl_clone(file, arg, 0, 0, 0);
case BTRFS_IOC_CLONE_RANGE:
......@@ -3110,6 +3298,12 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_scrub_cancel(root, argp);
case BTRFS_IOC_SCRUB_PROGRESS:
return btrfs_ioctl_scrub_progress(root, argp);
case BTRFS_IOC_BALANCE_V2:
return btrfs_ioctl_balance(root, argp);
case BTRFS_IOC_BALANCE_CTL:
return btrfs_ioctl_balance_ctl(root, arg);
case BTRFS_IOC_BALANCE_PROGRESS:
return btrfs_ioctl_balance_progress(root, argp);
}
return -ENOTTY;
......
......@@ -109,6 +109,55 @@ struct btrfs_ioctl_fs_info_args {
__u64 reserved[124]; /* pad to 1k */
};
/* balance control ioctl modes */
#define BTRFS_BALANCE_CTL_PAUSE 1
#define BTRFS_BALANCE_CTL_CANCEL 2
/*
* this is packed, because it should be exactly the same as its disk
* byte order counterpart (struct btrfs_disk_balance_args)
*/
struct btrfs_balance_args {
__u64 profiles;
__u64 usage;
__u64 devid;
__u64 pstart;
__u64 pend;
__u64 vstart;
__u64 vend;
__u64 target;
__u64 flags;
__u64 unused[8];
} __attribute__ ((__packed__));
/* report balance progress to userspace */
struct btrfs_balance_progress {
__u64 expected; /* estimated # of chunks that will be
* relocated to fulfill the request */
__u64 considered; /* # of chunks we have considered so far */
__u64 completed; /* # of chunks relocated so far */
};
#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0)
#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1)
#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2)
struct btrfs_ioctl_balance_args {
__u64 flags; /* in/out */
__u64 state; /* out */
struct btrfs_balance_args data; /* in/out */
struct btrfs_balance_args meta; /* in/out */
struct btrfs_balance_args sys; /* in/out */
struct btrfs_balance_progress stat; /* out */
__u64 unused[72]; /* pad to 1k */
};
#define BTRFS_INO_LOOKUP_PATH_MAX 4080
struct btrfs_ioctl_ino_lookup_args {
__u64 treeid;
......@@ -272,6 +321,11 @@ struct btrfs_ioctl_logical_ino_args {
struct btrfs_ioctl_dev_info_args)
#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
struct btrfs_ioctl_fs_info_args)
#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
struct btrfs_ioctl_balance_args)
#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int)
#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \
struct btrfs_ioctl_balance_args)
#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
......
......@@ -164,8 +164,9 @@ enum {
Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_err,
Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
Opt_err,
};
static match_table_t tokens = {
......@@ -200,6 +201,7 @@ static match_table_t tokens = {
{Opt_inode_cache, "inode_cache"},
{Opt_no_space_cache, "nospace_cache"},
{Opt_recovery, "recovery"},
{Opt_skip_balance, "skip_balance"},
{Opt_err, NULL},
};
......@@ -398,6 +400,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
printk(KERN_INFO "btrfs: enabling auto recovery");
btrfs_set_opt(info->mount_opt, RECOVERY);
break;
case Opt_skip_balance:
btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
break;
case Opt_err:
printk(KERN_INFO "btrfs: unrecognized mount option "
"'%s'\n", p);
......@@ -723,6 +728,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",autodefrag");
if (btrfs_test_opt(root, INODE_MAP_CACHE))
seq_puts(seq, ",inode_cache");
if (btrfs_test_opt(root, SKIP_BALANCE))
seq_puts(seq, ",skip_balance");
return 0;
}
......
This diff is collapsed.
......@@ -186,6 +186,51 @@ struct map_lookup {
#define map_lookup_size(n) (sizeof(struct map_lookup) + \
(sizeof(struct btrfs_bio_stripe) * (n)))
/*
* Restriper's general type filter
*/
#define BTRFS_BALANCE_DATA (1ULL << 0)
#define BTRFS_BALANCE_SYSTEM (1ULL << 1)
#define BTRFS_BALANCE_METADATA (1ULL << 2)
#define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \
BTRFS_BALANCE_SYSTEM | \
BTRFS_BALANCE_METADATA)
#define BTRFS_BALANCE_FORCE (1ULL << 3)
#define BTRFS_BALANCE_RESUME (1ULL << 4)
/*
* Balance filters
*/
#define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0)
#define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1)
#define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2)
#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3)
#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
/*
* Profile changing flags. When SOFT is set we won't relocate chunk if
* it already has the target profile (even though it may be
* half-filled).
*/
#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8)
#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9)
struct btrfs_balance_args;
struct btrfs_balance_progress;
struct btrfs_balance_control {
struct btrfs_fs_info *fs_info;
struct btrfs_balance_args data;
struct btrfs_balance_args meta;
struct btrfs_balance_args sys;
u64 flags;
struct btrfs_balance_progress stat;
};
int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
u64 end, u64 *length);
......@@ -228,7 +273,11 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
u8 *uuid, u8 *fsid);
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
int btrfs_init_new_device(struct btrfs_root *root, char *path);
int btrfs_balance(struct btrfs_root *dev_root);
int btrfs_balance(struct btrfs_balance_control *bctl,
struct btrfs_ioctl_balance_args *bargs);
int btrfs_recover_balance(struct btrfs_root *tree_root);
int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment