Commit 7237f183 authored by Yan Zheng's avatar Yan Zheng Committed by Chris Mason

Btrfs: fix tree logs parallel sync

To improve performance, btrfs_sync_log merges tree log sync
requests. But it wrongly merges sync requests for different
tree logs. If multiple tree logs are synced at the same time,
only one of them actually gets synced.

This patch has following changes to fix the bug:

Move most tree log related fields in btrfs_fs_info to
btrfs_root. This allows merging sync requests separately
for each tree log.

Don't insert root item into the log root tree immediately
after log tree is allocated. Root item for log tree is
inserted when log tree get synced for the first time. This
allows syncing the log root tree without first syncing all
log trees.

At tree-log sync, btrfs_sync_log first sync the log tree;
then updates corresponding root item in the log root tree;
sync the log root tree; then update the super block.
Signed-off-by: default avatarYan Zheng <zheng.yan@oracle.com>
parent 7e662854
......@@ -695,9 +695,7 @@ struct btrfs_fs_info {
struct btrfs_transaction *running_transaction;
wait_queue_head_t transaction_throttle;
wait_queue_head_t transaction_wait;
wait_queue_head_t async_submit_wait;
wait_queue_head_t tree_log_wait;
struct btrfs_super_block super_copy;
struct btrfs_super_block super_for_commit;
......@@ -724,10 +722,6 @@ struct btrfs_fs_info {
atomic_t async_submit_draining;
atomic_t nr_async_bios;
atomic_t async_delalloc_pages;
atomic_t tree_log_writers;
atomic_t tree_log_commit;
unsigned long tree_log_batch;
u64 tree_log_transid;
/*
* this is used by the balancing code to wait for all the pending
......@@ -827,7 +821,14 @@ struct btrfs_root {
struct kobject root_kobj;
struct completion kobj_unregister;
struct mutex objectid_mutex;
struct mutex log_mutex;
wait_queue_head_t log_writer_wait;
wait_queue_head_t log_commit_wait[2];
atomic_t log_writers;
atomic_t log_commit[2];
unsigned long log_transid;
unsigned long log_batch;
u64 objectid;
u64 last_trans;
......
......@@ -849,6 +849,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
spin_lock_init(&root->list_lock);
mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex);
init_waitqueue_head(&root->log_writer_wait);
init_waitqueue_head(&root->log_commit_wait[0]);
init_waitqueue_head(&root->log_commit_wait[1]);
atomic_set(&root->log_commit[0], 0);
atomic_set(&root->log_commit[1], 0);
atomic_set(&root->log_writers, 0);
root->log_batch = 0;
root->log_transid = 0;
extent_io_tree_init(&root->dirty_log_pages,
fs_info->btree_inode->i_mapping, GFP_NOFS);
......@@ -933,15 +941,16 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
return 0;
}
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root;
struct btrfs_root *tree_root = fs_info->tree_root;
struct extent_buffer *leaf;
root = kzalloc(sizeof(*root), GFP_NOFS);
if (!root)
return -ENOMEM;
return ERR_PTR(-ENOMEM);
__setup_root(tree_root->nodesize, tree_root->leafsize,
tree_root->sectorsize, tree_root->stripesize,
......@@ -950,12 +959,23 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
root->root_key.type = BTRFS_ROOT_ITEM_KEY;
root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
/*
* log trees do not get reference counted because they go away
* before a real commit is actually done. They do store pointers
* to file data extents, and those reference counts still get
* updated (along with back refs to the log tree).
*/
root->ref_cows = 0;
root->node = btrfs_alloc_free_block(trans, root, root->leafsize,
0, BTRFS_TREE_LOG_OBJECTID,
trans->transid, 0, 0, 0);
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, BTRFS_TREE_LOG_OBJECTID,
trans->transid, 0, 0, 0);
if (IS_ERR(leaf)) {
kfree(root);
return ERR_CAST(leaf);
}
root->node = leaf;
btrfs_set_header_nritems(root->node, 0);
btrfs_set_header_level(root->node, 0);
btrfs_set_header_bytenr(root->node, root->node->start);
......@@ -967,7 +987,48 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
BTRFS_FSID_SIZE);
btrfs_mark_buffer_dirty(root->node);
btrfs_tree_unlock(root->node);
fs_info->log_root_tree = root;
return root;
}
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_root *log_root;
log_root = alloc_log_tree(trans, fs_info);
if (IS_ERR(log_root))
return PTR_ERR(log_root);
WARN_ON(fs_info->log_root_tree);
fs_info->log_root_tree = log_root;
return 0;
}
int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_root *log_root;
struct btrfs_inode_item *inode_item;
log_root = alloc_log_tree(trans, root->fs_info);
if (IS_ERR(log_root))
return PTR_ERR(log_root);
log_root->last_trans = trans->transid;
log_root->root_key.offset = root->root_key.objectid;
inode_item = &log_root->root_item.inode;
inode_item->generation = cpu_to_le64(1);
inode_item->size = cpu_to_le64(3);
inode_item->nlink = cpu_to_le32(1);
inode_item->nbytes = cpu_to_le64(root->leafsize);
inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start);
btrfs_set_root_generation(&log_root->root_item, trans->transid);
WARN_ON(root->log_root);
root->log_root = log_root;
root->log_transid = 0;
return 0;
}
......@@ -1530,10 +1591,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->transaction_throttle);
init_waitqueue_head(&fs_info->transaction_wait);
init_waitqueue_head(&fs_info->async_submit_wait);
init_waitqueue_head(&fs_info->tree_log_wait);
atomic_set(&fs_info->tree_log_commit, 0);
atomic_set(&fs_info->tree_log_writers, 0);
fs_info->tree_log_transid = 0;
__setup_root(4096, 4096, 4096, 4096, tree_root,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
......
......@@ -98,5 +98,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btree_lock_page_hook(struct page *page);
#endif
......@@ -2698,13 +2698,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
/* if metadata always pin */
if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_block_group_cache *cache;
/* btrfs_free_reserved_extent */
cache = btrfs_lookup_block_group(root->fs_info, bytenr);
BUG_ON(!cache);
btrfs_add_free_space(cache, bytenr, num_bytes);
put_block_group(cache);
mutex_lock(&root->fs_info->pinned_mutex);
btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
mutex_unlock(&root->fs_info->pinned_mutex);
update_reserved_extents(root, bytenr, num_bytes, 0);
return 0;
}
......
......@@ -1214,10 +1214,10 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
}
mutex_unlock(&root->fs_info->trans_mutex);
root->fs_info->tree_log_batch++;
root->log_batch++;
filemap_fdatawrite(inode->i_mapping);
btrfs_wait_ordered_range(inode, 0, (u64)-1);
root->fs_info->tree_log_batch++;
root->log_batch++;
/*
* ok we haven't committed the transaction yet, lets do a commit
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment