Commit a4abeea4 authored by Josef Bacik's avatar Josef Bacik

Btrfs: kill trans_mutex

We use trans_mutex for lots of things, here's a basic list

1) To serialize trans_handles joining the currently running transaction
2) To make sure that no new trans handles are started while we are committing
3) To protect the dead_roots list and the transaction lists

Really the serializing trans_handles joining is not too hard, and can really get
bogged down in acquiring a reference to the transaction.  So replace the
trans_mutex with a trans_lock spinlock and use it to do the following

1) Protect fs_info->running_transaction.  All trans handles have to do is check
this, and then take a reference of the transaction and keep on going.
2) Protect the fs_info->trans_list.  This doesn't get used too much, basically
it just holds the current transactions, which will usually just be the currently
committing transaction and the currently running transaction at most.
3) Protect the dead roots list.  This is only ever processed by splicing the
list so this is relatively simple.
4) Protect the fs_info->reloc_ctl stuff.  This is very lightweight and was using
the trans_mutex before, so this is a pretty straightforward change.
5) Protect fs_info->no_trans_join.  Because we don't hold the trans_lock over
the entirety of the commit we need to have a way to block new people from
creating a new transaction while we're doing our work.  So we set no_trans_join
and in join_transaction we test to see if that is set, and if it is we do a
wait_on_commit.
6) Make the transaction use count atomic so we don't need to take locks to
modify it when we're dropping references.
7) Add a commit_lock to the transaction to make sure multiple people trying to
commit the same transaction don't race and commit at the same time.
8) Make open_ioctl_trans an atomic so we don't have to take any locks for ioctl
trans.

I have tested this with xfstests, but obviously it is a pretty hairy change so
lots of testing is greatly appreciated.  Thanks,
Signed-off-by: default avatarJosef Bacik <josef@redhat.com>
parent 2a1eb461
......@@ -919,7 +919,6 @@ struct btrfs_fs_info {
* is required instead of the faster short fsync log commits
*/
u64 last_trans_log_full_commit;
u64 open_ioctl_trans;
unsigned long mount_opt:20;
unsigned long compress_type:4;
u64 max_inline;
......@@ -936,7 +935,6 @@ struct btrfs_fs_info {
struct super_block *sb;
struct inode *btree_inode;
struct backing_dev_info bdi;
struct mutex trans_mutex;
struct mutex tree_log_mutex;
struct mutex transaction_kthread_mutex;
struct mutex cleaner_mutex;
......@@ -957,6 +955,7 @@ struct btrfs_fs_info {
struct rw_semaphore subvol_sem;
struct srcu_struct subvol_srcu;
spinlock_t trans_lock;
struct list_head trans_list;
struct list_head hashers;
struct list_head dead_roots;
......@@ -969,6 +968,7 @@ struct btrfs_fs_info {
atomic_t async_submit_draining;
atomic_t nr_async_bios;
atomic_t async_delalloc_pages;
atomic_t open_ioctl_trans;
/*
* this is used by the balancing code to wait for all the pending
......@@ -1032,6 +1032,7 @@ struct btrfs_fs_info {
int closing;
int log_root_recovering;
int enospc_unlink;
int trans_no_join;
u64 total_pinned;
......@@ -1053,7 +1054,6 @@ struct btrfs_fs_info {
struct reloc_control *reloc_ctl;
spinlock_t delalloc_lock;
spinlock_t new_trans_lock;
u64 delalloc_bytes;
/* data_alloc_cluster is only used in ssd mode */
......
......@@ -1551,22 +1551,22 @@ static int transaction_kthread(void *arg)
vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
mutex_lock(&root->fs_info->transaction_kthread_mutex);
spin_lock(&root->fs_info->new_trans_lock);
spin_lock(&root->fs_info->trans_lock);
cur = root->fs_info->running_transaction;
if (!cur) {
spin_unlock(&root->fs_info->new_trans_lock);
spin_unlock(&root->fs_info->trans_lock);
goto sleep;
}
now = get_seconds();
if (!cur->blocked &&
(now < cur->start_time || now - cur->start_time < 30)) {
spin_unlock(&root->fs_info->new_trans_lock);
spin_unlock(&root->fs_info->trans_lock);
delay = HZ * 5;
goto sleep;
}
transid = cur->transid;
spin_unlock(&root->fs_info->new_trans_lock);
spin_unlock(&root->fs_info->trans_lock);
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
......@@ -1658,7 +1658,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->ordered_operations);
INIT_LIST_HEAD(&fs_info->caching_block_groups);
spin_lock_init(&fs_info->delalloc_lock);
spin_lock_init(&fs_info->new_trans_lock);
spin_lock_init(&fs_info->trans_lock);
spin_lock_init(&fs_info->ref_cache_lock);
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
......@@ -1687,6 +1687,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->sb = sb;
fs_info->max_inline = 8192 * 1024;
fs_info->metadata_ratio = 0;
fs_info->trans_no_join = 0;
fs_info->thread_pool_size = min_t(unsigned long,
num_online_cpus() + 2, 8);
......@@ -1735,7 +1736,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->do_barriers = 1;
mutex_init(&fs_info->trans_mutex);
mutex_init(&fs_info->ordered_operations_mutex);
mutex_init(&fs_info->tree_log_mutex);
mutex_init(&fs_info->chunk_mutex);
......@@ -3006,10 +3006,13 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
WARN_ON(1);
mutex_lock(&root->fs_info->trans_mutex);
mutex_lock(&root->fs_info->transaction_kthread_mutex);
spin_lock(&root->fs_info->trans_lock);
list_splice_init(&root->fs_info->trans_list, &list);
root->fs_info->trans_no_join = 1;
spin_unlock(&root->fs_info->trans_lock);
while (!list_empty(&list)) {
t = list_entry(list.next, struct btrfs_transaction, list);
if (!t)
......@@ -3034,23 +3037,18 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
t->blocked = 0;
if (waitqueue_active(&root->fs_info->transaction_wait))
wake_up(&root->fs_info->transaction_wait);
mutex_unlock(&root->fs_info->trans_mutex);
mutex_lock(&root->fs_info->trans_mutex);
t->commit_done = 1;
if (waitqueue_active(&t->commit_wait))
wake_up(&t->commit_wait);
mutex_unlock(&root->fs_info->trans_mutex);
mutex_lock(&root->fs_info->trans_mutex);
btrfs_destroy_pending_snapshots(t);
btrfs_destroy_delalloc_inodes(root);
spin_lock(&root->fs_info->new_trans_lock);
spin_lock(&root->fs_info->trans_lock);
root->fs_info->running_transaction = NULL;
spin_unlock(&root->fs_info->new_trans_lock);
spin_unlock(&root->fs_info->trans_lock);
btrfs_destroy_marked_extents(root, &t->dirty_pages,
EXTENT_DIRTY);
......@@ -3064,8 +3062,10 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
kmem_cache_free(btrfs_transaction_cachep, t);
}
spin_lock(&root->fs_info->trans_lock);
root->fs_info->trans_no_join = 0;
spin_unlock(&root->fs_info->trans_lock);
mutex_unlock(&root->fs_info->transaction_kthread_mutex);
mutex_unlock(&root->fs_info->trans_mutex);
return 0;
}
......
......@@ -3200,7 +3200,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
/* commit the current transaction and try again */
commit_trans:
if (!committed && !root->fs_info->open_ioctl_trans) {
if (!committed &&
!atomic_read(&root->fs_info->open_ioctl_trans)) {
committed = 1;
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
......
......@@ -1222,14 +1222,12 @@ int btrfs_sync_file(struct file *file, int datasync)
* the current transaction, we can bail out now without any
* syncing
*/
mutex_lock(&root->fs_info->trans_mutex);
smp_mb();
if (BTRFS_I(inode)->last_trans <=
root->fs_info->last_trans_committed) {
BTRFS_I(inode)->last_trans = 0;
mutex_unlock(&root->fs_info->trans_mutex);
goto out;
}
mutex_unlock(&root->fs_info->trans_mutex);
/*
* ok we haven't committed the transaction yet, lets do a commit
......
......@@ -2177,9 +2177,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
if (ret)
goto out;
mutex_lock(&root->fs_info->trans_mutex);
root->fs_info->open_ioctl_trans++;
mutex_unlock(&root->fs_info->trans_mutex);
atomic_inc(&root->fs_info->open_ioctl_trans);
ret = -ENOMEM;
trans = btrfs_start_ioctl_transaction(root);
......@@ -2190,9 +2188,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
return 0;
out_drop:
mutex_lock(&root->fs_info->trans_mutex);
root->fs_info->open_ioctl_trans--;
mutex_unlock(&root->fs_info->trans_mutex);
atomic_dec(&root->fs_info->open_ioctl_trans);
mnt_drop_write(file->f_path.mnt);
out:
return ret;
......@@ -2426,9 +2422,7 @@ long btrfs_ioctl_trans_end(struct file *file)
btrfs_end_transaction(trans, root);
mutex_lock(&root->fs_info->trans_mutex);
root->fs_info->open_ioctl_trans--;
mutex_unlock(&root->fs_info->trans_mutex);
atomic_dec(&root->fs_info->open_ioctl_trans);
mnt_drop_write(file->f_path.mnt);
return 0;
......
......@@ -2136,10 +2136,10 @@ int prepare_to_merge(struct reloc_control *rc, int err)
u64 num_bytes = 0;
int ret;
mutex_lock(&root->fs_info->trans_mutex);
spin_lock(&root->fs_info->trans_lock);
rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
rc->merging_rsv_size += rc->nodes_relocated * 2;
mutex_unlock(&root->fs_info->trans_mutex);
spin_unlock(&root->fs_info->trans_lock);
again:
if (!err) {
num_bytes = rc->merging_rsv_size;
......@@ -2208,9 +2208,9 @@ int merge_reloc_roots(struct reloc_control *rc)
int ret;
again:
root = rc->extent_root;
mutex_lock(&root->fs_info->trans_mutex);
spin_lock(&root->fs_info->trans_lock);
list_splice_init(&rc->reloc_roots, &reloc_roots);
mutex_unlock(&root->fs_info->trans_mutex);
spin_unlock(&root->fs_info->trans_lock);
while (!list_empty(&reloc_roots)) {
found = 1;
......@@ -3583,17 +3583,17 @@ int find_next_extent(struct btrfs_trans_handle *trans,
static void set_reloc_control(struct reloc_control *rc)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
mutex_lock(&fs_info->trans_mutex);
spin_lock(&fs_info->trans_lock);
fs_info->reloc_ctl = rc;
mutex_unlock(&fs_info->trans_mutex);
spin_unlock(&fs_info->trans_lock);
}
static void unset_reloc_control(struct reloc_control *rc)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
mutex_lock(&fs_info->trans_mutex);
spin_lock(&fs_info->trans_lock);
fs_info->reloc_ctl = NULL;
mutex_unlock(&fs_info->trans_mutex);
spin_unlock(&fs_info->trans_lock);
}
static int check_extent_flags(u64 flags)
......
This diff is collapsed.
......@@ -28,10 +28,12 @@ struct btrfs_transaction {
* transaction can end
*/
atomic_t num_writers;
atomic_t use_count;
unsigned long num_joined;
spinlock_t commit_lock;
int in_commit;
atomic_t use_count;
int commit_done;
int blocked;
struct list_head list;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment