Commit f5d80856 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.10-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - lockdep fixes:
     - drop path locks before manipulating sysfs objects or qgroups
     - preliminary fixes before tree locks get switched to rwsem
     - use annotated seqlock

 - build warning fixes (printk format)

 - fix relocation vs fallocate race

 - tree checker properly validates number of stripes and parity

 - readahead vs device replace fixes

 - iomap dio fix for unnecessary buffered io fallback

* tag 'for-5.10-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: convert data_seqcount to seqcount_mutex_t
  btrfs: don't fallback to buffered read if we don't need to
  btrfs: add a helper to read the tree_root commit root for backref lookup
  btrfs: drop the path before adding qgroup items when enabling qgroups
  btrfs: fix readahead hang and use-after-free after removing a device
  btrfs: fix use-after-free on readahead extent after failure to create it
  btrfs: tree-checker: validate number of chunk stripes and parity
  btrfs: tree-checker: fix incorrect printk format
  btrfs: drop the path before adding block group sysfs files
  btrfs: fix relocation failure due to race with fallocate
parents aab6bf50 d5c82388
...@@ -544,7 +544,18 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, ...@@ -544,7 +544,18 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
int level = ref->level; int level = ref->level;
struct btrfs_key search_key = ref->key_for_search; struct btrfs_key search_key = ref->key_for_search;
root = btrfs_get_fs_root(fs_info, ref->root_id, false); /*
* If we're search_commit_root we could possibly be holding locks on
* other tree nodes. This happens when qgroups does backref walks when
* adding new delayed refs. To deal with this we need to look in cache
* for the root, and if we don't find it then we need to search the
* tree_root's commit root, thus the btrfs_get_fs_root_commit_root usage
* here.
*/
if (path->search_commit_root)
root = btrfs_get_fs_root_commit_root(fs_info, path, ref->root_id);
else
root = btrfs_get_fs_root(fs_info, ref->root_id, false);
if (IS_ERR(root)) { if (IS_ERR(root)) {
ret = PTR_ERR(root); ret = PTR_ERR(root);
goto out_free; goto out_free;
......
...@@ -2024,6 +2024,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) ...@@ -2024,6 +2024,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
key.offset = 0; key.offset = 0;
btrfs_release_path(path); btrfs_release_path(path);
} }
btrfs_release_path(path);
list_for_each_entry(space_info, &info->space_info, list) { list_for_each_entry(space_info, &info->space_info, list) {
int i; int i;
......
...@@ -3564,6 +3564,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, ...@@ -3564,6 +3564,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
int btrfs_reada_wait(void *handle); int btrfs_reada_wait(void *handle);
void btrfs_reada_detach(void *handle); void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct extent_buffer *eb, int err); int btree_readahead_hook(struct extent_buffer *eb, int err);
void btrfs_reada_remove_dev(struct btrfs_device *dev);
void btrfs_reada_undo_remove_dev(struct btrfs_device *dev);
static inline int is_fstree(u64 rootid) static inline int is_fstree(u64 rootid)
{ {
......
...@@ -688,6 +688,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, ...@@ -688,6 +688,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
} }
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
if (!scrub_ret)
btrfs_reada_remove_dev(src_device);
/* /*
* We have to use this loop approach because at this point src_device * We have to use this loop approach because at this point src_device
* has to be available for transaction commit to complete, yet new * has to be available for transaction commit to complete, yet new
...@@ -696,6 +699,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, ...@@ -696,6 +699,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
while (1) { while (1) {
trans = btrfs_start_transaction(root, 0); trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
btrfs_reada_undo_remove_dev(src_device);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return PTR_ERR(trans); return PTR_ERR(trans);
} }
...@@ -746,6 +750,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, ...@@ -746,6 +750,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
up_write(&dev_replace->rwsem); up_write(&dev_replace->rwsem);
mutex_unlock(&fs_info->chunk_mutex); mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->fs_devices->device_list_mutex); mutex_unlock(&fs_info->fs_devices->device_list_mutex);
btrfs_reada_undo_remove_dev(src_device);
btrfs_rm_dev_replace_blocked(fs_info); btrfs_rm_dev_replace_blocked(fs_info);
if (tgt_device) if (tgt_device)
btrfs_destroy_dev_replace_tgtdev(tgt_device); btrfs_destroy_dev_replace_tgtdev(tgt_device);
......
...@@ -1281,32 +1281,26 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, ...@@ -1281,32 +1281,26 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
return 0; return 0;
} }
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
struct btrfs_key *key) struct btrfs_path *path,
struct btrfs_key *key)
{ {
struct btrfs_root *root; struct btrfs_root *root;
struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_fs_info *fs_info = tree_root->fs_info;
struct btrfs_path *path;
u64 generation; u64 generation;
int ret; int ret;
int level; int level;
path = btrfs_alloc_path();
if (!path)
return ERR_PTR(-ENOMEM);
root = btrfs_alloc_root(fs_info, key->objectid, GFP_NOFS); root = btrfs_alloc_root(fs_info, key->objectid, GFP_NOFS);
if (!root) { if (!root)
ret = -ENOMEM; return ERR_PTR(-ENOMEM);
goto alloc_fail;
}
ret = btrfs_find_root(tree_root, key, path, ret = btrfs_find_root(tree_root, key, path,
&root->root_item, &root->root_key); &root->root_item, &root->root_key);
if (ret) { if (ret) {
if (ret > 0) if (ret > 0)
ret = -ENOENT; ret = -ENOENT;
goto find_fail; goto fail;
} }
generation = btrfs_root_generation(&root->root_item); generation = btrfs_root_generation(&root->root_item);
...@@ -1317,21 +1311,31 @@ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, ...@@ -1317,21 +1311,31 @@ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
if (IS_ERR(root->node)) { if (IS_ERR(root->node)) {
ret = PTR_ERR(root->node); ret = PTR_ERR(root->node);
root->node = NULL; root->node = NULL;
goto find_fail; goto fail;
} else if (!btrfs_buffer_uptodate(root->node, generation, 0)) { } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
ret = -EIO; ret = -EIO;
goto find_fail; goto fail;
} }
root->commit_root = btrfs_root_node(root); root->commit_root = btrfs_root_node(root);
out:
btrfs_free_path(path);
return root; return root;
fail:
find_fail:
btrfs_put_root(root); btrfs_put_root(root);
alloc_fail: return ERR_PTR(ret);
root = ERR_PTR(ret); }
goto out;
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
struct btrfs_key *key)
{
struct btrfs_root *root;
struct btrfs_path *path;
path = btrfs_alloc_path();
if (!path)
return ERR_PTR(-ENOMEM);
root = read_tree_root_path(tree_root, path, key);
btrfs_free_path(path);
return root;
} }
/* /*
...@@ -1419,6 +1423,31 @@ static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, ...@@ -1419,6 +1423,31 @@ static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
return root; return root;
} }
static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info,
u64 objectid)
{
if (objectid == BTRFS_ROOT_TREE_OBJECTID)
return btrfs_grab_root(fs_info->tree_root);
if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
return btrfs_grab_root(fs_info->extent_root);
if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
return btrfs_grab_root(fs_info->chunk_root);
if (objectid == BTRFS_DEV_TREE_OBJECTID)
return btrfs_grab_root(fs_info->dev_root);
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
return btrfs_grab_root(fs_info->csum_root);
if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
return btrfs_grab_root(fs_info->quota_root) ?
fs_info->quota_root : ERR_PTR(-ENOENT);
if (objectid == BTRFS_UUID_TREE_OBJECTID)
return btrfs_grab_root(fs_info->uuid_root) ?
fs_info->uuid_root : ERR_PTR(-ENOENT);
if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
return btrfs_grab_root(fs_info->free_space_root) ?
fs_info->free_space_root : ERR_PTR(-ENOENT);
return NULL;
}
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root) struct btrfs_root *root)
{ {
...@@ -1518,25 +1547,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, ...@@ -1518,25 +1547,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
struct btrfs_key key; struct btrfs_key key;
int ret; int ret;
if (objectid == BTRFS_ROOT_TREE_OBJECTID) root = btrfs_get_global_root(fs_info, objectid);
return btrfs_grab_root(fs_info->tree_root); if (root)
if (objectid == BTRFS_EXTENT_TREE_OBJECTID) return root;
return btrfs_grab_root(fs_info->extent_root);
if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
return btrfs_grab_root(fs_info->chunk_root);
if (objectid == BTRFS_DEV_TREE_OBJECTID)
return btrfs_grab_root(fs_info->dev_root);
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
return btrfs_grab_root(fs_info->csum_root);
if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
return btrfs_grab_root(fs_info->quota_root) ?
fs_info->quota_root : ERR_PTR(-ENOENT);
if (objectid == BTRFS_UUID_TREE_OBJECTID)
return btrfs_grab_root(fs_info->uuid_root) ?
fs_info->uuid_root : ERR_PTR(-ENOENT);
if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
return btrfs_grab_root(fs_info->free_space_root) ?
fs_info->free_space_root : ERR_PTR(-ENOENT);
again: again:
root = btrfs_lookup_fs_root(fs_info, objectid); root = btrfs_lookup_fs_root(fs_info, objectid);
if (root) { if (root) {
...@@ -1621,6 +1634,52 @@ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, ...@@ -1621,6 +1634,52 @@ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
return btrfs_get_root_ref(fs_info, objectid, anon_dev, true); return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
} }
/*
* btrfs_get_fs_root_commit_root - return a root for the given objectid
* @fs_info: the fs_info
* @objectid: the objectid we need to lookup
*
* This is exclusively used for backref walking, and exists specifically because
* of how qgroups does lookups. Qgroups will do a backref lookup at delayed ref
* creation time, which means we may have to read the tree_root in order to look
* up a fs root that is not in memory. If the root is not in memory we will
* read the tree root commit root and look up the fs root from there. This is a
* temporary root, it will not be inserted into the radix tree as it doesn't
* have the most uptodate information, it'll simply be discarded once the
* backref code is finished using the root.
*/
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 objectid)
{
struct btrfs_root *root;
struct btrfs_key key;
ASSERT(path->search_commit_root && path->skip_locking);
/*
* This can return -ENOENT if we ask for a root that doesn't exist, but
* since this is called via the backref walking code we won't be looking
* up a root that doesn't exist, unless there's corruption. So if root
* != NULL just return it.
*/
root = btrfs_get_global_root(fs_info, objectid);
if (root)
return root;
root = btrfs_lookup_fs_root(fs_info, objectid);
if (root)
return root;
key.objectid = objectid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = read_tree_root_path(fs_info->tree_root, path, &key);
btrfs_release_path(path);
return root;
}
/* /*
* called by the kthread helper functions to finally call the bio end_io * called by the kthread helper functions to finally call the bio end_io
* functions. This is where read checksum verification actually happens * functions. This is where read checksum verification actually happens
......
...@@ -69,6 +69,9 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, ...@@ -69,6 +69,9 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref); u64 objectid, bool check_ref);
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, dev_t anon_dev); u64 objectid, dev_t anon_dev);
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 objectid);
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info); void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
......
...@@ -3185,7 +3185,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ...@@ -3185,7 +3185,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_tree_block_info *bi; struct btrfs_tree_block_info *bi;
if (item_size < sizeof(*ei) + sizeof(*bi)) { if (item_size < sizeof(*ei) + sizeof(*bi)) {
btrfs_crit(info, btrfs_crit(info,
"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %lu", "invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %zu",
key.objectid, key.type, key.offset, key.objectid, key.type, key.offset,
owner_objectid, item_size, owner_objectid, item_size,
sizeof(*ei) + sizeof(*bi)); sizeof(*ei) + sizeof(*bi));
......
...@@ -3628,7 +3628,8 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) ...@@ -3628,7 +3628,8 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
inode_lock_shared(inode); inode_lock_shared(inode);
ret = btrfs_direct_IO(iocb, to); ret = btrfs_direct_IO(iocb, to);
inode_unlock_shared(inode); inode_unlock_shared(inode);
if (ret < 0) if (ret < 0 || !iov_iter_count(to) ||
iocb->ki_pos >= i_size_read(file_inode(iocb->ki_filp)))
return ret; return ret;
} }
......
...@@ -9672,10 +9672,16 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, ...@@ -9672,10 +9672,16 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
* clear_offset by our extent size. * clear_offset by our extent size.
*/ */
clear_offset += ins.offset; clear_offset += ins.offset;
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
last_alloc = ins.offset; last_alloc = ins.offset;
trans = insert_prealloc_file_extent(trans, inode, &ins, cur_offset); trans = insert_prealloc_file_extent(trans, inode, &ins, cur_offset);
/*
* Now that we inserted the prealloc extent we can finally
* decrement the number of reservations in the block group.
* If we did it before, we could race with relocation and have
* relocation miss the reserved extent, making it fail later.
*/
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
ret = PTR_ERR(trans); ret = PTR_ERR(trans);
btrfs_free_reserved_extent(fs_info, ins.objectid, btrfs_free_reserved_extent(fs_info, ins.objectid,
......
...@@ -1026,6 +1026,10 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) ...@@ -1026,6 +1026,10 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
btrfs_item_key_to_cpu(leaf, &found_key, slot); btrfs_item_key_to_cpu(leaf, &found_key, slot);
if (found_key.type == BTRFS_ROOT_REF_KEY) { if (found_key.type == BTRFS_ROOT_REF_KEY) {
/* Release locks on tree_root before we access quota_root */
btrfs_release_path(path);
ret = add_qgroup_item(trans, quota_root, ret = add_qgroup_item(trans, quota_root,
found_key.offset); found_key.offset);
if (ret) { if (ret) {
...@@ -1044,6 +1048,20 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) ...@@ -1044,6 +1048,20 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
goto out_free_path; goto out_free_path;
} }
ret = btrfs_search_slot_for_read(tree_root, &found_key,
path, 1, 0);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
if (ret > 0) {
/*
* Shouldn't happen, but in case it does we
* don't need to do the btrfs_next_item, just
* continue.
*/
continue;
}
} }
ret = btrfs_next_item(tree_root, path); ret = btrfs_next_item(tree_root, path);
if (ret < 0) { if (ret < 0) {
......
...@@ -421,6 +421,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, ...@@ -421,6 +421,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
if (!dev->bdev) if (!dev->bdev)
continue; continue;
if (test_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state))
continue;
if (dev_replace_is_ongoing && if (dev_replace_is_ongoing &&
dev == fs_info->dev_replace.tgtdev) { dev == fs_info->dev_replace.tgtdev) {
/* /*
...@@ -445,6 +448,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, ...@@ -445,6 +448,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
} }
have_zone = 1; have_zone = 1;
} }
if (!have_zone)
radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
up_read(&fs_info->dev_replace.rwsem); up_read(&fs_info->dev_replace.rwsem);
...@@ -1020,3 +1025,45 @@ void btrfs_reada_detach(void *handle) ...@@ -1020,3 +1025,45 @@ void btrfs_reada_detach(void *handle)
kref_put(&rc->refcnt, reada_control_release); kref_put(&rc->refcnt, reada_control_release);
} }
/*
* Before removing a device (device replace or device remove ioctls), call this
* function to wait for all existing readahead requests on the device and to
* make sure no one queues more readahead requests for the device.
*
* Must be called without holding neither the device list mutex nor the device
* replace semaphore, otherwise it will deadlock.
*/
void btrfs_reada_remove_dev(struct btrfs_device *dev)
{
struct btrfs_fs_info *fs_info = dev->fs_info;
/* Serialize with readahead extent creation at reada_find_extent(). */
spin_lock(&fs_info->reada_lock);
set_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state);
spin_unlock(&fs_info->reada_lock);
/*
* There might be readahead requests added to the radix trees which
* were not yet added to the readahead work queue. We need to start
* them and wait for their completion, otherwise we can end up with
* use-after-free problems when dropping the last reference on the
* readahead extents and their zones, as they need to access the
* device structure.
*/
reada_start_machine(fs_info);
btrfs_flush_workqueue(fs_info->readahead_workers);
}
/*
* If when removing a device (device replace or device remove ioctls) an error
* happens after calling btrfs_reada_remove_dev(), call this to undo what that
* function did. This is safe to call even if btrfs_reada_remove_dev() was not
* called before.
*/
void btrfs_reada_undo_remove_dev(struct btrfs_device *dev)
{
spin_lock(&dev->fs_info->reada_lock);
clear_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state);
spin_unlock(&dev->fs_info->reada_lock);
}
...@@ -760,18 +760,36 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, ...@@ -760,18 +760,36 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
u64 type; u64 type;
u64 features; u64 features;
bool mixed = false; bool mixed = false;
int raid_index;
int nparity;
int ncopies;
length = btrfs_chunk_length(leaf, chunk); length = btrfs_chunk_length(leaf, chunk);
stripe_len = btrfs_chunk_stripe_len(leaf, chunk); stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
num_stripes = btrfs_chunk_num_stripes(leaf, chunk); num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
type = btrfs_chunk_type(leaf, chunk); type = btrfs_chunk_type(leaf, chunk);
raid_index = btrfs_bg_flags_to_raid_index(type);
ncopies = btrfs_raid_array[raid_index].ncopies;
nparity = btrfs_raid_array[raid_index].nparity;
if (!num_stripes) { if (!num_stripes) {
chunk_err(leaf, chunk, logical, chunk_err(leaf, chunk, logical,
"invalid chunk num_stripes, have %u", num_stripes); "invalid chunk num_stripes, have %u", num_stripes);
return -EUCLEAN; return -EUCLEAN;
} }
if (num_stripes < ncopies) {
chunk_err(leaf, chunk, logical,
"invalid chunk num_stripes < ncopies, have %u < %d",
num_stripes, ncopies);
return -EUCLEAN;
}
if (nparity && num_stripes == nparity) {
chunk_err(leaf, chunk, logical,
"invalid chunk num_stripes == nparity, have %u == %d",
num_stripes, nparity);
return -EUCLEAN;
}
if (!IS_ALIGNED(logical, fs_info->sectorsize)) { if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
chunk_err(leaf, chunk, logical, chunk_err(leaf, chunk, logical,
"invalid chunk logical, have %llu should aligned to %u", "invalid chunk logical, have %llu should aligned to %u",
......
...@@ -431,7 +431,7 @@ static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info) ...@@ -431,7 +431,7 @@ static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info)
atomic_set(&dev->reada_in_flight, 0); atomic_set(&dev->reada_in_flight, 0);
atomic_set(&dev->dev_stats_ccnt, 0); atomic_set(&dev->dev_stats_ccnt, 0);
btrfs_device_data_ordered_init(dev); btrfs_device_data_ordered_init(dev, fs_info);
INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
extent_io_tree_init(fs_info, &dev->alloc_state, extent_io_tree_init(fs_info, &dev->alloc_state,
...@@ -2099,6 +2099,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, ...@@ -2099,6 +2099,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
mutex_unlock(&uuid_mutex); mutex_unlock(&uuid_mutex);
ret = btrfs_shrink_device(device, 0); ret = btrfs_shrink_device(device, 0);
if (!ret)
btrfs_reada_remove_dev(device);
mutex_lock(&uuid_mutex); mutex_lock(&uuid_mutex);
if (ret) if (ret)
goto error_undo; goto error_undo;
...@@ -2179,6 +2181,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, ...@@ -2179,6 +2181,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
return ret; return ret;
error_undo: error_undo:
btrfs_reada_undo_remove_dev(device);
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
mutex_lock(&fs_info->chunk_mutex); mutex_lock(&fs_info->chunk_mutex);
list_add(&device->dev_alloc_list, list_add(&device->dev_alloc_list,
......
...@@ -39,10 +39,10 @@ struct btrfs_io_geometry { ...@@ -39,10 +39,10 @@ struct btrfs_io_geometry {
#if BITS_PER_LONG==32 && defined(CONFIG_SMP) #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
#include <linux/seqlock.h> #include <linux/seqlock.h>
#define __BTRFS_NEED_DEVICE_DATA_ORDERED #define __BTRFS_NEED_DEVICE_DATA_ORDERED
#define btrfs_device_data_ordered_init(device) \ #define btrfs_device_data_ordered_init(device, info) \
seqcount_init(&device->data_seqcount) seqcount_mutex_init(&device->data_seqcount, &info->chunk_mutex)
#else #else
#define btrfs_device_data_ordered_init(device) do { } while (0) #define btrfs_device_data_ordered_init(device, info) do { } while (0)
#endif #endif
#define BTRFS_DEV_STATE_WRITEABLE (0) #define BTRFS_DEV_STATE_WRITEABLE (0)
...@@ -50,6 +50,7 @@ struct btrfs_io_geometry { ...@@ -50,6 +50,7 @@ struct btrfs_io_geometry {
#define BTRFS_DEV_STATE_MISSING (2) #define BTRFS_DEV_STATE_MISSING (2)
#define BTRFS_DEV_STATE_REPLACE_TGT (3) #define BTRFS_DEV_STATE_REPLACE_TGT (3)
#define BTRFS_DEV_STATE_FLUSH_SENT (4) #define BTRFS_DEV_STATE_FLUSH_SENT (4)
#define BTRFS_DEV_STATE_NO_READA (5)
struct btrfs_device { struct btrfs_device {
struct list_head dev_list; /* device_list_mutex */ struct list_head dev_list; /* device_list_mutex */
...@@ -71,7 +72,8 @@ struct btrfs_device { ...@@ -71,7 +72,8 @@ struct btrfs_device {
blk_status_t last_flush_error; blk_status_t last_flush_error;
#ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED
seqcount_t data_seqcount; /* A seqcount_t with associated chunk_mutex (for lockdep) */
seqcount_mutex_t data_seqcount;
#endif #endif
/* the internal btrfs device id */ /* the internal btrfs device id */
...@@ -162,11 +164,9 @@ btrfs_device_get_##name(const struct btrfs_device *dev) \ ...@@ -162,11 +164,9 @@ btrfs_device_get_##name(const struct btrfs_device *dev) \
static inline void \ static inline void \
btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \
{ \ { \
preempt_disable(); \
write_seqcount_begin(&dev->data_seqcount); \ write_seqcount_begin(&dev->data_seqcount); \
dev->name = size; \ dev->name = size; \
write_seqcount_end(&dev->data_seqcount); \ write_seqcount_end(&dev->data_seqcount); \
preempt_enable(); \
} }
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
#define BTRFS_DEVICE_GETSET_FUNCS(name) \ #define BTRFS_DEVICE_GETSET_FUNCS(name) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment