Commit 73beece9 authored by Liu Bo's avatar Liu Bo Committed by David Sterba

Btrfs: fix lockdep deadlock warning due to dev_replace

Xfstests btrfs/011 complains about a deadlock warning,

[ 1226.649039] =========================================================
[ 1226.649039] [ INFO: possible irq lock inversion dependency detected ]
[ 1226.649039] 4.1.0+ #270 Not tainted
[ 1226.649039] ---------------------------------------------------------
[ 1226.652955] kswapd0/46 just changed the state of lock:
[ 1226.652955]  (&delayed_node->mutex){+.+.-.}, at: [<ffffffff81458735>] __btrfs_release_delayed_node+0x45/0x1d0
[ 1226.652955] but this lock took another, RECLAIM_FS-unsafe lock in the past:
[ 1226.652955]  (&fs_info->dev_replace.lock){+.+.+.}

and interrupts could create inverse lock ordering between them.

[ 1226.652955]
other info that might help us debug this:
[ 1226.652955] Chain exists of:
  &delayed_node->mutex --> &found->groups_sem --> &fs_info->dev_replace.lock

[ 1226.652955]  Possible interrupt unsafe locking scenario:

[ 1226.652955]        CPU0                    CPU1
[ 1226.652955]        ----                    ----
[ 1226.652955]   lock(&fs_info->dev_replace.lock);
[ 1226.652955]                                local_irq_disable();
[ 1226.652955]                                lock(&delayed_node->mutex);
[ 1226.652955]                                lock(&found->groups_sem);
[ 1226.652955]   <Interrupt>
[ 1226.652955]     lock(&delayed_node->mutex);
[ 1226.652955]
 *** DEADLOCK ***

Commit 084b6e7c ("btrfs: Fix a lockdep warning when running xfstest.") tried
to fix a similar one that has the exactly same warning, but with that, we still
run to this.

The above lock chain comes from
btrfs_commit_transaction
  ->btrfs_run_delayed_items
    ...
    ->__btrfs_update_delayed_inode
      ...
      ->__btrfs_cow_block
         ...
         ->find_free_extent
            ->cache_block_group
              ->load_free_space_cache
                ->btrfs_readpages
                  ->submit_one_bio
                    ...
                    ->__btrfs_map_block
                      ->btrfs_dev_replace_lock

However, with high memory pressure, tasks which hold dev_replace.lock can
be interrupted by kswapd and then kswapd is intended to release memory occupied
by superblock, inodes and dentries, where we may call evict_inode, and it comes
to

[ 1226.652955]  [<ffffffff81458735>] __btrfs_release_delayed_node+0x45/0x1d0
[ 1226.652955]  [<ffffffff81459e74>] btrfs_remove_delayed_node+0x24/0x30
[ 1226.652955]  [<ffffffff8140c5fe>] btrfs_evict_inode+0x34e/0x700

delayed_node->mutex may be acquired in __btrfs_release_delayed_node(), and it leads
to a ABBA deadlock.

To fix this, we can use "blocking rwlock" used in the case of extent_buffer, but
things are simpler here since we only needs read's spinlock to blocking lock.

With this, btrfs/011 no more produces warnings in dmesg.
Signed-off-by: default avatarLiu Bo <bo.li.liu@oracle.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 18558cae
...@@ -1002,8 +1002,10 @@ struct btrfs_dev_replace { ...@@ -1002,8 +1002,10 @@ struct btrfs_dev_replace {
pid_t lock_owner; pid_t lock_owner;
atomic_t nesting_level; atomic_t nesting_level;
struct mutex lock_finishing_cancel_unmount; struct mutex lock_finishing_cancel_unmount;
struct mutex lock_management_lock; rwlock_t lock;
struct mutex lock; atomic_t read_locks;
atomic_t blocking_readers;
wait_queue_head_t read_lock_wq;
struct btrfs_scrub_progress scrub_progress; struct btrfs_scrub_progress scrub_progress;
}; };
......
...@@ -202,13 +202,13 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, ...@@ -202,13 +202,13 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
struct btrfs_dev_replace_item *ptr; struct btrfs_dev_replace_item *ptr;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
btrfs_dev_replace_lock(dev_replace); btrfs_dev_replace_lock(dev_replace, 0);
if (!dev_replace->is_valid || if (!dev_replace->is_valid ||
!dev_replace->item_needs_writeback) { !dev_replace->item_needs_writeback) {
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 0);
return 0; return 0;
} }
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 0);
key.objectid = 0; key.objectid = 0;
key.type = BTRFS_DEV_REPLACE_KEY; key.type = BTRFS_DEV_REPLACE_KEY;
...@@ -264,7 +264,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, ...@@ -264,7 +264,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
ptr = btrfs_item_ptr(eb, path->slots[0], ptr = btrfs_item_ptr(eb, path->slots[0],
struct btrfs_dev_replace_item); struct btrfs_dev_replace_item);
btrfs_dev_replace_lock(dev_replace); btrfs_dev_replace_lock(dev_replace, 1);
if (dev_replace->srcdev) if (dev_replace->srcdev)
btrfs_set_dev_replace_src_devid(eb, ptr, btrfs_set_dev_replace_src_devid(eb, ptr,
dev_replace->srcdev->devid); dev_replace->srcdev->devid);
...@@ -287,7 +287,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, ...@@ -287,7 +287,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
btrfs_set_dev_replace_cursor_right(eb, ptr, btrfs_set_dev_replace_cursor_right(eb, ptr,
dev_replace->cursor_right); dev_replace->cursor_right);
dev_replace->item_needs_writeback = 0; dev_replace->item_needs_writeback = 0;
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_mark_buffer_dirty(eb); btrfs_mark_buffer_dirty(eb);
...@@ -356,7 +356,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, ...@@ -356,7 +356,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
return PTR_ERR(trans); return PTR_ERR(trans);
} }
btrfs_dev_replace_lock(dev_replace); btrfs_dev_replace_lock(dev_replace, 1);
switch (dev_replace->replace_state) { switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
...@@ -395,7 +395,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, ...@@ -395,7 +395,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
dev_replace->is_valid = 1; dev_replace->is_valid = 1;
dev_replace->item_needs_writeback = 1; dev_replace->item_needs_writeback = 1;
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 1);
ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device); ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
if (ret) if (ret)
...@@ -407,7 +407,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, ...@@ -407,7 +407,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
trans = btrfs_start_transaction(root, 0); trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
ret = PTR_ERR(trans); ret = PTR_ERR(trans);
btrfs_dev_replace_lock(dev_replace); btrfs_dev_replace_lock(dev_replace, 1);
goto leave; goto leave;
} }
...@@ -433,7 +433,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, ...@@ -433,7 +433,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
leave: leave:
dev_replace->srcdev = NULL; dev_replace->srcdev = NULL;
dev_replace->tgtdev = NULL; dev_replace->tgtdev = NULL;
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
return ret; return ret;
} }
...@@ -471,18 +471,18 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, ...@@ -471,18 +471,18 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
/* don't allow cancel or unmount to disturb the finishing procedure */ /* don't allow cancel or unmount to disturb the finishing procedure */
mutex_lock(&dev_replace->lock_finishing_cancel_unmount); mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
btrfs_dev_replace_lock(dev_replace); btrfs_dev_replace_lock(dev_replace, 0);
/* was the operation canceled, or is it finished? */ /* was the operation canceled, or is it finished? */
if (dev_replace->replace_state != if (dev_replace->replace_state !=
BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) { BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) {
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 0);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return 0; return 0;
} }
tgt_device = dev_replace->tgtdev; tgt_device = dev_replace->tgtdev;
src_device = dev_replace->srcdev; src_device = dev_replace->srcdev;
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 0);
/* /*
* flush all outstanding I/O and inode extent mappings before the * flush all outstanding I/O and inode extent mappings before the
...@@ -507,7 +507,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, ...@@ -507,7 +507,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
/* keep away write_all_supers() during the finishing procedure */ /* keep away write_all_supers() during the finishing procedure */
mutex_lock(&root->fs_info->fs_devices->device_list_mutex); mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
mutex_lock(&root->fs_info->chunk_mutex); mutex_lock(&root->fs_info->chunk_mutex);
btrfs_dev_replace_lock(dev_replace); btrfs_dev_replace_lock(dev_replace, 1);
dev_replace->replace_state = dev_replace->replace_state =
scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
: BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED; : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
...@@ -528,7 +528,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, ...@@ -528,7 +528,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
rcu_str_deref(src_device->name), rcu_str_deref(src_device->name),
src_device->devid, src_device->devid,
rcu_str_deref(tgt_device->name), scrub_ret); rcu_str_deref(tgt_device->name), scrub_ret);
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 1);
mutex_unlock(&root->fs_info->chunk_mutex); mutex_unlock(&root->fs_info->chunk_mutex);
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
mutex_unlock(&uuid_mutex); mutex_unlock(&uuid_mutex);
...@@ -565,7 +565,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, ...@@ -565,7 +565,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
fs_info->fs_devices->rw_devices++; fs_info->fs_devices->rw_devices++;
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_rm_dev_replace_blocked(fs_info); btrfs_rm_dev_replace_blocked(fs_info);
...@@ -649,7 +649,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, ...@@ -649,7 +649,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
struct btrfs_device *srcdev; struct btrfs_device *srcdev;
btrfs_dev_replace_lock(dev_replace); btrfs_dev_replace_lock(dev_replace, 0);
/* even if !dev_replace_is_valid, the values are good enough for /* even if !dev_replace_is_valid, the values are good enough for
* the replace_status ioctl */ * the replace_status ioctl */
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
...@@ -675,7 +675,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, ...@@ -675,7 +675,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
div_u64(btrfs_device_get_total_bytes(srcdev), 1000)); div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
break; break;
} }
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 0);
} }
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info, int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
...@@ -698,13 +698,13 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) ...@@ -698,13 +698,13 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
return -EROFS; return -EROFS;
mutex_lock(&dev_replace->lock_finishing_cancel_unmount); mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
btrfs_dev_replace_lock(dev_replace); btrfs_dev_replace_lock(dev_replace, 1);
switch (dev_replace->replace_state) { switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED; result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED;
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 1);
goto leave; goto leave;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
...@@ -717,7 +717,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) ...@@ -717,7 +717,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
dev_replace->time_stopped = get_seconds(); dev_replace->time_stopped = get_seconds();
dev_replace->item_needs_writeback = 1; dev_replace->item_needs_writeback = 1;
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_scrub_cancel(fs_info); btrfs_scrub_cancel(fs_info);
trans = btrfs_start_transaction(root, 0); trans = btrfs_start_transaction(root, 0);
...@@ -740,7 +740,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info) ...@@ -740,7 +740,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
mutex_lock(&dev_replace->lock_finishing_cancel_unmount); mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
btrfs_dev_replace_lock(dev_replace); btrfs_dev_replace_lock(dev_replace, 1);
switch (dev_replace->replace_state) { switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
...@@ -756,7 +756,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info) ...@@ -756,7 +756,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
break; break;
} }
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 1);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
} }
...@@ -766,12 +766,12 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info) ...@@ -766,12 +766,12 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
struct task_struct *task; struct task_struct *task;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
btrfs_dev_replace_lock(dev_replace); btrfs_dev_replace_lock(dev_replace, 1);
switch (dev_replace->replace_state) { switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 1);
return 0; return 0;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
break; break;
...@@ -784,10 +784,10 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info) ...@@ -784,10 +784,10 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing"); btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing");
btrfs_info(fs_info, btrfs_info(fs_info,
"you may cancel the operation after 'mount -o degraded'"); "you may cancel the operation after 'mount -o degraded'");
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 1);
return 0; return 0;
} }
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 1);
WARN_ON(atomic_xchg( WARN_ON(atomic_xchg(
&fs_info->mutually_exclusive_operation_running, 1)); &fs_info->mutually_exclusive_operation_running, 1));
...@@ -865,48 +865,58 @@ int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace) ...@@ -865,48 +865,58 @@ int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
return 1; return 1;
} }
void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace) void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw)
{ {
/* the beginning is just an optimization for the typical case */ if (rw == 1) {
if (atomic_read(&dev_replace->nesting_level) == 0) { /* write */
acquire_lock: again:
/* this is not a nested case where the same thread wait_event(dev_replace->read_lock_wq,
* is trying to acqurire the same lock twice */ atomic_read(&dev_replace->blocking_readers) == 0);
mutex_lock(&dev_replace->lock); write_lock(&dev_replace->lock);
mutex_lock(&dev_replace->lock_management_lock); if (atomic_read(&dev_replace->blocking_readers)) {
dev_replace->lock_owner = current->pid; write_unlock(&dev_replace->lock);
atomic_inc(&dev_replace->nesting_level); goto again;
mutex_unlock(&dev_replace->lock_management_lock); }
return; } else {
read_lock(&dev_replace->lock);
atomic_inc(&dev_replace->read_locks);
} }
}
mutex_lock(&dev_replace->lock_management_lock); void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw)
if (atomic_read(&dev_replace->nesting_level) > 0 && {
dev_replace->lock_owner == current->pid) { if (rw == 1) {
WARN_ON(!mutex_is_locked(&dev_replace->lock)); /* write */
atomic_inc(&dev_replace->nesting_level); ASSERT(atomic_read(&dev_replace->blocking_readers) == 0);
mutex_unlock(&dev_replace->lock_management_lock); write_unlock(&dev_replace->lock);
return; } else {
ASSERT(atomic_read(&dev_replace->read_locks) > 0);
atomic_dec(&dev_replace->read_locks);
read_unlock(&dev_replace->lock);
} }
}
mutex_unlock(&dev_replace->lock_management_lock); /* inc blocking cnt and release read lock */
goto acquire_lock; void btrfs_dev_replace_set_lock_blocking(
struct btrfs_dev_replace *dev_replace)
{
/* only set blocking for read lock */
ASSERT(atomic_read(&dev_replace->read_locks) > 0);
atomic_inc(&dev_replace->blocking_readers);
read_unlock(&dev_replace->lock);
} }
void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace) /* acquire read lock and dec blocking cnt */
void btrfs_dev_replace_clear_lock_blocking(
struct btrfs_dev_replace *dev_replace)
{ {
WARN_ON(!mutex_is_locked(&dev_replace->lock)); /* only set blocking for read lock */
mutex_lock(&dev_replace->lock_management_lock); ASSERT(atomic_read(&dev_replace->read_locks) > 0);
WARN_ON(atomic_read(&dev_replace->nesting_level) < 1); ASSERT(atomic_read(&dev_replace->blocking_readers) > 0);
WARN_ON(dev_replace->lock_owner != current->pid); read_lock(&dev_replace->lock);
atomic_dec(&dev_replace->nesting_level); if (atomic_dec_and_test(&dev_replace->blocking_readers) &&
if (atomic_read(&dev_replace->nesting_level) == 0) { waitqueue_active(&dev_replace->read_lock_wq))
dev_replace->lock_owner = 0; wake_up(&dev_replace->read_lock_wq);
mutex_unlock(&dev_replace->lock_management_lock);
mutex_unlock(&dev_replace->lock);
} else {
mutex_unlock(&dev_replace->lock_management_lock);
}
} }
void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info) void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
......
...@@ -34,8 +34,11 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info, ...@@ -34,8 +34,11 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info); void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info); int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace); int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace); void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw);
void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace); void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw);
void btrfs_dev_replace_set_lock_blocking(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_clear_lock_blocking(
struct btrfs_dev_replace *dev_replace);
static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value) static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value)
{ {
......
...@@ -2272,9 +2272,11 @@ static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info) ...@@ -2272,9 +2272,11 @@ static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
fs_info->dev_replace.lock_owner = 0; fs_info->dev_replace.lock_owner = 0;
atomic_set(&fs_info->dev_replace.nesting_level, 0); atomic_set(&fs_info->dev_replace.nesting_level, 0);
mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
mutex_init(&fs_info->dev_replace.lock_management_lock); rwlock_init(&fs_info->dev_replace.lock);
mutex_init(&fs_info->dev_replace.lock); atomic_set(&fs_info->dev_replace.read_locks, 0);
atomic_set(&fs_info->dev_replace.blocking_readers, 0);
init_waitqueue_head(&fs_info->replace_wait); init_waitqueue_head(&fs_info->replace_wait);
init_waitqueue_head(&fs_info->dev_replace.read_lock_wq);
} }
static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
......
...@@ -396,7 +396,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -396,7 +396,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
} }
/* insert extent in reada_tree + all per-device trees, all or nothing */ /* insert extent in reada_tree + all per-device trees, all or nothing */
btrfs_dev_replace_lock(&fs_info->dev_replace); btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
spin_lock(&fs_info->reada_lock); spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&fs_info->reada_tree, index, re); ret = radix_tree_insert(&fs_info->reada_tree, index, re);
if (ret == -EEXIST) { if (ret == -EEXIST) {
...@@ -404,12 +404,12 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -404,12 +404,12 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
BUG_ON(!re_exist); BUG_ON(!re_exist);
re_exist->refcnt++; re_exist->refcnt++;
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace); btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
goto error; goto error;
} }
if (ret) { if (ret) {
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace); btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
goto error; goto error;
} }
prev_dev = NULL; prev_dev = NULL;
...@@ -456,12 +456,12 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -456,12 +456,12 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
BUG_ON(fs_info == NULL); BUG_ON(fs_info == NULL);
radix_tree_delete(&fs_info->reada_tree, index); radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace); btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
goto error; goto error;
} }
} }
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace); btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
btrfs_put_bbio(bbio); btrfs_put_bbio(bbio);
return re; return re;
......
...@@ -3857,16 +3857,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, ...@@ -3857,16 +3857,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return -EIO; return -EIO;
} }
btrfs_dev_replace_lock(&fs_info->dev_replace); btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (dev->scrub_device || if (dev->scrub_device ||
(!is_dev_replace && (!is_dev_replace &&
btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) { btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
btrfs_dev_replace_unlock(&fs_info->dev_replace); btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex); mutex_unlock(&fs_info->fs_devices->device_list_mutex);
return -EINPROGRESS; return -EINPROGRESS;
} }
btrfs_dev_replace_unlock(&fs_info->dev_replace); btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
ret = scrub_workers_get(fs_info, is_dev_replace); ret = scrub_workers_get(fs_info, is_dev_replace);
if (ret) { if (ret) {
......
...@@ -1714,12 +1714,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) ...@@ -1714,12 +1714,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
} while (read_seqretry(&root->fs_info->profiles_lock, seq)); } while (read_seqretry(&root->fs_info->profiles_lock, seq));
num_devices = root->fs_info->fs_devices->num_devices; num_devices = root->fs_info->fs_devices->num_devices;
btrfs_dev_replace_lock(&root->fs_info->dev_replace); btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) { if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) {
WARN_ON(num_devices < 1); WARN_ON(num_devices < 1);
num_devices--; num_devices--;
} }
btrfs_dev_replace_unlock(&root->fs_info->dev_replace); btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0);
if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) { if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET; ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET;
...@@ -3686,12 +3686,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl, ...@@ -3686,12 +3686,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
} }
num_devices = fs_info->fs_devices->num_devices; num_devices = fs_info->fs_devices->num_devices;
btrfs_dev_replace_lock(&fs_info->dev_replace); btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) { if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
BUG_ON(num_devices < 1); BUG_ON(num_devices < 1);
num_devices--; num_devices--;
} }
btrfs_dev_replace_unlock(&fs_info->dev_replace); btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (num_devices == 1) if (num_devices == 1)
allowed |= BTRFS_BLOCK_GROUP_DUP; allowed |= BTRFS_BLOCK_GROUP_DUP;
...@@ -5062,10 +5062,10 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) ...@@ -5062,10 +5062,10 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
ret = 1; ret = 1;
free_extent_map(em); free_extent_map(em);
btrfs_dev_replace_lock(&fs_info->dev_replace); btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
ret++; ret++;
btrfs_dev_replace_unlock(&fs_info->dev_replace); btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
return ret; return ret;
} }
...@@ -5325,10 +5325,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, ...@@ -5325,10 +5325,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
if (!bbio_ret) if (!bbio_ret)
goto out; goto out;
btrfs_dev_replace_lock(dev_replace); btrfs_dev_replace_lock(dev_replace, 0);
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
if (!dev_replace_is_ongoing) if (!dev_replace_is_ongoing)
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace, 0);
else
btrfs_dev_replace_set_lock_blocking(dev_replace);
if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) && !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
...@@ -5751,8 +5753,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, ...@@ -5751,8 +5753,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
bbio->mirror_num = map->num_stripes + 1; bbio->mirror_num = map->num_stripes + 1;
} }
out: out:
if (dev_replace_is_ongoing) if (dev_replace_is_ongoing) {
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_clear_lock_blocking(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 0);
}
free_extent_map(em); free_extent_map(em);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment