Commit 8dabb742 authored by Stefan Behrens's avatar Stefan Behrens Committed by Josef Bacik

Btrfs: change core code of btrfs to support the device replace operations

This commit contains all the essential changes to the core code
of Btrfs for support of the device replace procedure.
Signed-off-by: default avatarStefan Behrens <sbehrens@giantdisaster.de>
Signed-off-by: default avatarChris Mason <chris.mason@fusionio.com>
parent e93c89c1
......@@ -45,6 +45,7 @@
#include "inode-map.h"
#include "check-integrity.h"
#include "rcu-string.h"
#include "dev-replace.h"
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
......@@ -2438,7 +2439,11 @@ int open_ctree(struct super_block *sb,
goto fail_tree_roots;
}
btrfs_close_extra_devices(fs_devices);
/*
* keep the device that is marked to be the target device for the
* dev_replace procedure
*/
btrfs_close_extra_devices(fs_info, fs_devices, 0);
if (!fs_devices->latest_bdev) {
printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
......@@ -2510,6 +2515,14 @@ int open_ctree(struct super_block *sb,
goto fail_block_groups;
}
ret = btrfs_init_dev_replace(fs_info);
if (ret) {
pr_err("btrfs: failed to init dev_replace: %d\n", ret);
goto fail_block_groups;
}
btrfs_close_extra_devices(fs_info, fs_devices, 1);
ret = btrfs_init_space_info(fs_info);
if (ret) {
printk(KERN_ERR "Failed to initial space info: %d\n", ret);
......@@ -2658,6 +2671,13 @@ int open_ctree(struct super_block *sb,
return ret;
}
ret = btrfs_resume_dev_replace_async(fs_info);
if (ret) {
pr_warn("btrfs: failed to resume dev_replace\n");
close_ctree(tree_root);
return ret;
}
return 0;
fail_qgroup:
......@@ -3300,6 +3320,8 @@ int close_ctree(struct btrfs_root *root)
/* pause restriper - we want to resume on mount */
btrfs_pause_balance(fs_info);
btrfs_dev_replace_suspend_for_unmount(fs_info);
btrfs_scrub_cancel(fs_info);
/* wait for any defraggers to finish */
......
......@@ -27,6 +27,7 @@
#include "volumes.h"
#include "disk-io.h"
#include "transaction.h"
#include "dev-replace.h"
#undef DEBUG
......@@ -331,6 +332,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
int nzones = 0;
int i;
unsigned long index = logical >> PAGE_CACHE_SHIFT;
int dev_replace_is_ongoing;
spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index);
......@@ -392,6 +394,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
}
/* insert extent in reada_tree + all per-device trees, all or nothing */
btrfs_dev_replace_lock(&fs_info->dev_replace);
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&fs_info->reada_tree, index, re);
if (ret == -EEXIST) {
......@@ -399,13 +402,17 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
BUG_ON(!re_exist);
re_exist->refcnt++;
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error;
}
if (ret) {
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error;
}
prev_dev = NULL;
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
&fs_info->dev_replace);
for (i = 0; i < nzones; ++i) {
dev = bbio->stripes[i].dev;
if (dev == prev_dev) {
......@@ -422,6 +429,14 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
/* cannot read ahead on missing device */
continue;
}
if (dev_replace_is_ongoing &&
dev == fs_info->dev_replace.tgtdev) {
/*
* as this device is selected for reading only as
* a last resort, skip it for read ahead.
*/
continue;
}
prev_dev = dev;
ret = radix_tree_insert(&dev->reada_extents, index, re);
if (ret) {
......@@ -434,10 +449,12 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
BUG_ON(fs_info == NULL);
radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error;
}
}
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
kfree(bbio);
return re;
......
......@@ -2843,12 +2843,17 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return -EIO;
}
if (dev->scrub_device) {
btrfs_dev_replace_lock(&fs_info->dev_replace);
if (dev->scrub_device ||
(!is_dev_replace &&
btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
btrfs_dev_replace_unlock(&fs_info->dev_replace);
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
scrub_workers_put(fs_info);
return -EINPROGRESS;
}
btrfs_dev_replace_unlock(&fs_info->dev_replace);
sctx = scrub_setup_ctx(dev, is_dev_replace);
if (IS_ERR(sctx)) {
mutex_unlock(&fs_info->scrub_lock);
......
......@@ -55,6 +55,7 @@
#include "export.h"
#include "compression.h"
#include "rcu-string.h"
#include "dev-replace.h"
#define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h>
......@@ -1225,8 +1226,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
return 0;
if (*flags & MS_RDONLY) {
/*
* this also happens on 'umount -rf' or on shutdown, when
* the filesystem is busy.
*/
sb->s_flags |= MS_RDONLY;
btrfs_dev_replace_suspend_for_unmount(fs_info);
btrfs_scrub_cancel(fs_info);
ret = btrfs_commit_super(root);
if (ret)
goto restore;
......@@ -1263,6 +1271,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret)
goto restore;
ret = btrfs_resume_dev_replace_async(fs_info);
if (ret) {
pr_warn("btrfs: failed to resume dev_replace\n");
goto restore;
}
sb->s_flags &= ~MS_RDONLY;
}
......
......@@ -30,6 +30,7 @@
#include "tree-log.h"
#include "inode-map.h"
#include "volumes.h"
#include "dev-replace.h"
#define BTRFS_ROOT_TRANS_TAG 0
......@@ -845,7 +846,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
return ret;
ret = btrfs_run_dev_stats(trans, root->fs_info);
BUG_ON(ret);
WARN_ON(ret);
ret = btrfs_run_dev_replace(trans, root->fs_info);
WARN_ON(ret);
ret = btrfs_run_qgroups(trans, root->fs_info);
BUG_ON(ret);
......@@ -868,6 +871,8 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
switch_commit_root(fs_info->extent_root);
up_write(&fs_info->extent_commit_sem);
btrfs_after_dev_replace_commit(fs_info);
return 0;
}
......
......@@ -36,6 +36,7 @@
#include "check-integrity.h"
#include "rcu-string.h"
#include "math.h"
#include "dev-replace.h"
static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
......@@ -505,7 +506,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
return ERR_PTR(-ENOMEM);
}
void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
struct btrfs_fs_devices *fs_devices, int step)
{
struct btrfs_device *device, *next;
......@@ -528,6 +530,21 @@ void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
continue;
}
if (device->devid == BTRFS_DEV_REPLACE_DEVID) {
/*
* In the first step, keep the device which has
* the correct fsid and the devid that is used
* for the dev_replace procedure.
* In the second step, the dev_replace state is
* read from the device tree and it is known
* whether the procedure is really active or
* not, which means whether this device is
* used or whether it should be removed.
*/
if (step == 0 || device->is_tgtdev_for_dev_replace) {
continue;
}
}
if (device->bdev) {
blkdev_put(device->bdev, device->mode);
device->bdev = NULL;
......@@ -536,7 +553,8 @@ void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
if (device->writeable) {
list_del_init(&device->dev_alloc_list);
device->writeable = 0;
fs_devices->rw_devices--;
if (!device->is_tgtdev_for_dev_replace)
fs_devices->rw_devices--;
}
list_del_init(&device->dev_list);
fs_devices->num_devices--;
......@@ -594,7 +612,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
if (device->bdev)
fs_devices->open_devices--;
if (device->writeable) {
if (device->writeable && !device->is_tgtdev_for_dev_replace) {
list_del_init(&device->dev_alloc_list);
fs_devices->rw_devices--;
}
......@@ -718,7 +736,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fs_devices->rotating = 1;
fs_devices->open_devices++;
if (device->writeable) {
if (device->writeable && !device->is_tgtdev_for_dev_replace) {
fs_devices->rw_devices++;
list_add(&device->dev_alloc_list,
&fs_devices->alloc_list);
......@@ -1350,16 +1368,22 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
root->fs_info->avail_system_alloc_bits |
root->fs_info->avail_metadata_alloc_bits;
if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
root->fs_info->fs_devices->num_devices <= 4) {
num_devices = root->fs_info->fs_devices->num_devices;
btrfs_dev_replace_lock(&root->fs_info->dev_replace);
if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) {
WARN_ON(num_devices < 1);
num_devices--;
}
btrfs_dev_replace_unlock(&root->fs_info->dev_replace);
if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
printk(KERN_ERR "btrfs: unable to go below four devices "
"on raid10\n");
ret = -EINVAL;
goto out;
}
if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
root->fs_info->fs_devices->num_devices <= 2) {
if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) {
printk(KERN_ERR "btrfs: unable to go below two "
"devices on raid1\n");
ret = -EINVAL;
......@@ -2935,6 +2959,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
u64 allowed;
int mixed = 0;
int ret;
u64 num_devices;
if (btrfs_fs_closing(fs_info) ||
atomic_read(&fs_info->balance_pause_req) ||
......@@ -2963,10 +2988,17 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
}
}
num_devices = fs_info->fs_devices->num_devices;
btrfs_dev_replace_lock(&fs_info->dev_replace);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
BUG_ON(num_devices < 1);
num_devices--;
}
btrfs_dev_replace_unlock(&fs_info->dev_replace);
allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (fs_info->fs_devices->num_devices == 1)
if (num_devices == 1)
allowed |= BTRFS_BLOCK_GROUP_DUP;
else if (fs_info->fs_devices->num_devices < 4)
else if (num_devices < 4)
allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
else
allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
......@@ -3591,6 +3623,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
devices_info[ndevs].total_avail = total_avail;
devices_info[ndevs].dev = device;
++ndevs;
WARN_ON(ndevs > fs_devices->rw_devices);
}
/*
......@@ -4773,6 +4806,7 @@ static void fill_device_from_item(struct extent_buffer *leaf,
device->io_align = btrfs_device_io_align(leaf, dev_item);
device->io_width = btrfs_device_io_width(leaf, dev_item);
device->sector_size = btrfs_device_sector_size(leaf, dev_item);
WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
device->is_tgtdev_for_dev_replace = 0;
ptr = (unsigned long)btrfs_device_uuid(dev_item);
......
......@@ -268,7 +268,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
struct btrfs_fs_devices **fs_devices_ret);
int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices);
void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
struct btrfs_fs_devices *fs_devices, int step);
int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
char *device_path,
struct btrfs_device **device);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment