Commit 034f784d authored by Josef Bacik's avatar Josef Bacik Committed by David Sterba

btrfs: replace cleaner_delayed_iput_mutex with a waitqueue

The throttle path doesn't take cleaner_delayed_iput_mutex, which means
we could think we're done flushing iputs in the data space reservation
path when we could have a throttler doing an iput.  There's no real
reason to serialize the delayed iput flushing, so instead of taking the
cleaner_delayed_iput_mutex whenever we flush the delayed iputs just
replace it with an atomic counter and a waitqueue.  This removes the
short (or long depending on how big the inode is) window where we think
there are no more pending iputs when there really are some.

The waiting is killable as it could be indirectly called from user
operations like fallocate or zero-range. Such call sites should handle
the error but otherwise it's not necessary. Eg. flush_space just needs
to attempt to make space by waiting on iputs.
Signed-off-by: default avatarJosef Bacik <josef@toxicpanda.com>
[ add killable comment and changelog parts ]
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 3ece54e5
...@@ -934,7 +934,8 @@ struct btrfs_fs_info { ...@@ -934,7 +934,8 @@ struct btrfs_fs_info {
spinlock_t delayed_iput_lock; spinlock_t delayed_iput_lock;
struct list_head delayed_iputs; struct list_head delayed_iputs;
struct mutex cleaner_delayed_iput_mutex; atomic_t nr_delayed_iputs;
wait_queue_head_t delayed_iputs_wait;
/* this protects tree_mod_seq_list */ /* this protects tree_mod_seq_list */
spinlock_t tree_mod_seq_lock; spinlock_t tree_mod_seq_lock;
...@@ -3282,6 +3283,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root); ...@@ -3282,6 +3283,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
void btrfs_add_delayed_iput(struct inode *inode); void btrfs_add_delayed_iput(struct inode *inode);
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info); void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info);
int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info);
int btrfs_prealloc_file_range(struct inode *inode, int mode, int btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 start, u64 num_bytes, u64 min_size, u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint); loff_t actual_len, u64 *alloc_hint);
......
...@@ -1717,9 +1717,7 @@ static int cleaner_kthread(void *arg) ...@@ -1717,9 +1717,7 @@ static int cleaner_kthread(void *arg)
goto sleep; goto sleep;
} }
mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
btrfs_run_delayed_iputs(fs_info); btrfs_run_delayed_iputs(fs_info);
mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
again = btrfs_clean_one_deleted_snapshot(root); again = btrfs_clean_one_deleted_snapshot(root);
mutex_unlock(&fs_info->cleaner_mutex); mutex_unlock(&fs_info->cleaner_mutex);
...@@ -2676,7 +2674,6 @@ int open_ctree(struct super_block *sb, ...@@ -2676,7 +2674,6 @@ int open_ctree(struct super_block *sb,
mutex_init(&fs_info->delete_unused_bgs_mutex); mutex_init(&fs_info->delete_unused_bgs_mutex);
mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex); mutex_init(&fs_info->delalloc_root_mutex);
mutex_init(&fs_info->cleaner_delayed_iput_mutex);
seqlock_init(&fs_info->profiles_lock); seqlock_init(&fs_info->profiles_lock);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
...@@ -2698,6 +2695,7 @@ int open_ctree(struct super_block *sb, ...@@ -2698,6 +2695,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->defrag_running, 0); atomic_set(&fs_info->defrag_running, 0);
atomic_set(&fs_info->qgroup_op_seq, 0); atomic_set(&fs_info->qgroup_op_seq, 0);
atomic_set(&fs_info->reada_works_cnt, 0); atomic_set(&fs_info->reada_works_cnt, 0);
atomic_set(&fs_info->nr_delayed_iputs, 0);
atomic64_set(&fs_info->tree_mod_seq, 0); atomic64_set(&fs_info->tree_mod_seq, 0);
fs_info->sb = sb; fs_info->sb = sb;
fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
...@@ -2775,6 +2773,7 @@ int open_ctree(struct super_block *sb, ...@@ -2775,6 +2773,7 @@ int open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->transaction_wait);
init_waitqueue_head(&fs_info->transaction_blocked_wait); init_waitqueue_head(&fs_info->transaction_blocked_wait);
init_waitqueue_head(&fs_info->async_submit_wait); init_waitqueue_head(&fs_info->async_submit_wait);
init_waitqueue_head(&fs_info->delayed_iputs_wait);
INIT_LIST_HEAD(&fs_info->pinned_chunks); INIT_LIST_HEAD(&fs_info->pinned_chunks);
......
...@@ -4279,10 +4279,14 @@ int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes) ...@@ -4279,10 +4279,14 @@ int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
/* /*
* The cleaner kthread might still be doing iput * The cleaner kthread might still be doing iput
* operations. Wait for it to finish so that * operations. Wait for it to finish so that
* more space is released. * more space is released. We don't need to
* explicitly run the delayed iputs here because
* the commit_transaction would have woken up
* the cleaner.
*/ */
mutex_lock(&fs_info->cleaner_delayed_iput_mutex); ret = btrfs_wait_on_delayed_iputs(fs_info);
mutex_unlock(&fs_info->cleaner_delayed_iput_mutex); if (ret)
return ret;
goto again; goto again;
} else { } else {
btrfs_end_transaction(trans); btrfs_end_transaction(trans);
...@@ -4967,9 +4971,8 @@ static void flush_space(struct btrfs_fs_info *fs_info, ...@@ -4967,9 +4971,8 @@ static void flush_space(struct btrfs_fs_info *fs_info,
* bunch of pinned space, so make sure we run the iputs before * bunch of pinned space, so make sure we run the iputs before
* we do our pinned bytes check below. * we do our pinned bytes check below.
*/ */
mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
btrfs_run_delayed_iputs(fs_info); btrfs_run_delayed_iputs(fs_info);
mutex_unlock(&fs_info->cleaner_delayed_iput_mutex); btrfs_wait_on_delayed_iputs(fs_info);
ret = may_commit_transaction(fs_info, space_info); ret = may_commit_transaction(fs_info, space_info);
break; break;
......
...@@ -3256,6 +3256,7 @@ void btrfs_add_delayed_iput(struct inode *inode) ...@@ -3256,6 +3256,7 @@ void btrfs_add_delayed_iput(struct inode *inode)
if (atomic_add_unless(&inode->i_count, -1, 1)) if (atomic_add_unless(&inode->i_count, -1, 1))
return; return;
atomic_inc(&fs_info->nr_delayed_iputs);
spin_lock(&fs_info->delayed_iput_lock); spin_lock(&fs_info->delayed_iput_lock);
ASSERT(list_empty(&binode->delayed_iput)); ASSERT(list_empty(&binode->delayed_iput));
list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs); list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
...@@ -3276,11 +3277,32 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) ...@@ -3276,11 +3277,32 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
list_del_init(&inode->delayed_iput); list_del_init(&inode->delayed_iput);
spin_unlock(&fs_info->delayed_iput_lock); spin_unlock(&fs_info->delayed_iput_lock);
iput(&inode->vfs_inode); iput(&inode->vfs_inode);
if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
wake_up(&fs_info->delayed_iputs_wait);
spin_lock(&fs_info->delayed_iput_lock); spin_lock(&fs_info->delayed_iput_lock);
} }
spin_unlock(&fs_info->delayed_iput_lock); spin_unlock(&fs_info->delayed_iput_lock);
} }
/**
* btrfs_wait_on_delayed_iputs - wait on the delayed iputs to be done running
* @fs_info - the fs_info for this fs
* @return - EINTR if we were killed, 0 if nothing's pending
*
* This will wait on any delayed iputs that are currently running with KILLABLE
* set. Once they are all done running we will return, unless we are killed in
* which case we return EINTR. This helps in user operations like fallocate etc
* that might get blocked on the iputs.
*/
int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info)
{
int ret = wait_event_killable(fs_info->delayed_iputs_wait,
atomic_read(&fs_info->nr_delayed_iputs) == 0);
if (ret)
return -EINTR;
return 0;
}
/* /*
* This creates an orphan entry for the given inode in case something goes wrong * This creates an orphan entry for the given inode in case something goes wrong
* in the middle of an unlink. * in the middle of an unlink.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment