Commit f87f057b authored by Chris Mason's avatar Chris Mason

Btrfs: Improve and cleanup locking done by walk_down_tree

While dropping snapshots, walk_down_tree does most of the work of checking
reference counts and limiting tree traversal to just the blocks that
we are freeing.

It dropped and held the allocation mutex in strange and confusing ways,
this commit changes it to only hold the mutex while actually freeing a block.

The rest of the checks around reference counts should be safe without the lock
because we only allow one process in btrfs_drop_snapshot at a time.  Other
processes dropping reference counts should not drop it to 1 because
their tree roots already have an extra ref on the block.
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 492bb6de
...@@ -2333,8 +2333,6 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, ...@@ -2333,8 +2333,6 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans,
leaf_owner = btrfs_header_owner(leaf); leaf_owner = btrfs_header_owner(leaf);
leaf_generation = btrfs_header_generation(leaf); leaf_generation = btrfs_header_generation(leaf);
mutex_unlock(&root->fs_info->alloc_mutex);
for (i = 0; i < nritems; i++) { for (i = 0; i < nritems; i++) {
u64 disk_bytenr; u64 disk_bytenr;
cond_resched(); cond_resched();
...@@ -2362,8 +2360,6 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, ...@@ -2362,8 +2360,6 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans,
mutex_unlock(&root->fs_info->alloc_mutex); mutex_unlock(&root->fs_info->alloc_mutex);
BUG_ON(ret); BUG_ON(ret);
} }
mutex_lock(&root->fs_info->alloc_mutex);
return 0; return 0;
} }
...@@ -2375,7 +2371,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, ...@@ -2375,7 +2371,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
int ret; int ret;
struct btrfs_extent_info *info = ref->extents; struct btrfs_extent_info *info = ref->extents;
mutex_unlock(&root->fs_info->alloc_mutex);
for (i = 0; i < ref->nritems; i++) { for (i = 0; i < ref->nritems; i++) {
mutex_lock(&root->fs_info->alloc_mutex); mutex_lock(&root->fs_info->alloc_mutex);
ret = __btrfs_free_extent(trans, root, ret = __btrfs_free_extent(trans, root,
...@@ -2386,7 +2381,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, ...@@ -2386,7 +2381,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
BUG_ON(ret); BUG_ON(ret);
info++; info++;
} }
mutex_lock(&root->fs_info->alloc_mutex);
return 0; return 0;
} }
...@@ -2440,10 +2434,39 @@ int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, ...@@ -2440,10 +2434,39 @@ int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
u32 *refs) u32 *refs)
{ {
int ret; int ret;
mutex_unlock(&root->fs_info->alloc_mutex);
ret = lookup_extent_ref(NULL, root, start, len, refs); ret = lookup_extent_ref(NULL, root, start, len, refs);
BUG_ON(ret);
#if 0 // some debugging code in case we see problems here
/* if the refs count is one, it won't get increased again. But
* if the ref count is > 1, someone may be decreasing it at
* the same time we are.
*/
if (*refs != 1) {
struct extent_buffer *eb = NULL;
eb = btrfs_find_create_tree_block(root, start, len);
if (eb)
btrfs_tree_lock(eb);
mutex_lock(&root->fs_info->alloc_mutex);
ret = lookup_extent_ref(NULL, root, start, len, refs);
BUG_ON(ret);
mutex_unlock(&root->fs_info->alloc_mutex);
if (eb) {
btrfs_tree_unlock(eb);
free_extent_buffer(eb);
}
if (*refs == 1) {
printk("block %llu went down to one during drop_snap\n",
(unsigned long long)start);
}
}
#endif
cond_resched(); cond_resched();
mutex_lock(&root->fs_info->alloc_mutex);
return ret; return ret;
} }
...@@ -2467,8 +2490,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, ...@@ -2467,8 +2490,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
int ret; int ret;
u32 refs; u32 refs;
mutex_lock(&root->fs_info->alloc_mutex);
WARN_ON(*level < 0); WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL); WARN_ON(*level >= BTRFS_MAX_LEVEL);
ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
...@@ -2507,13 +2528,21 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, ...@@ -2507,13 +2528,21 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
root_owner = btrfs_header_owner(parent); root_owner = btrfs_header_owner(parent);
root_gen = btrfs_header_generation(parent); root_gen = btrfs_header_generation(parent);
path->slots[*level]++; path->slots[*level]++;
mutex_lock(&root->fs_info->alloc_mutex);
ret = __btrfs_free_extent(trans, root, bytenr, ret = __btrfs_free_extent(trans, root, bytenr,
blocksize, root_owner, blocksize, root_owner,
root_gen, 0, 0, 1); root_gen, 0, 0, 1);
BUG_ON(ret); BUG_ON(ret);
mutex_unlock(&root->fs_info->alloc_mutex);
continue; continue;
} }
/*
* at this point, we have a single ref, and since the
* only place referencing this extent is a dead root
* the reference count should never go higher.
* So, we don't need to check it again
*/
if (*level == 1) { if (*level == 1) {
struct btrfs_key key; struct btrfs_key key;
btrfs_node_key_to_cpu(cur, &key, path->slots[*level]); btrfs_node_key_to_cpu(cur, &key, path->slots[*level]);
...@@ -2533,33 +2562,23 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, ...@@ -2533,33 +2562,23 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
next = btrfs_find_tree_block(root, bytenr, blocksize); next = btrfs_find_tree_block(root, bytenr, blocksize);
if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
free_extent_buffer(next); free_extent_buffer(next);
mutex_unlock(&root->fs_info->alloc_mutex);
if (path->slots[*level] == 0) if (path->slots[*level] == 0)
reada_walk_down(root, cur, path->slots[*level]); reada_walk_down(root, cur, path->slots[*level]);
next = read_tree_block(root, bytenr, blocksize, next = read_tree_block(root, bytenr, blocksize,
ptr_gen); ptr_gen);
cond_resched(); cond_resched();
mutex_lock(&root->fs_info->alloc_mutex); #if 0
/*
/* we've dropped the lock, double check */ * this is a debugging check and can go away
* the ref should never go all the way down to 1
* at this point
*/
ret = lookup_extent_ref(NULL, root, bytenr, blocksize, ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
&refs); &refs);
BUG_ON(ret); BUG_ON(ret);
if (refs != 1) { WARN_ON(refs != 1);
parent = path->nodes[*level]; #endif
root_owner = btrfs_header_owner(parent);
root_gen = btrfs_header_generation(parent);
path->slots[*level]++;
free_extent_buffer(next);
ret = __btrfs_free_extent(trans, root, bytenr,
blocksize,
root_owner,
root_gen, 0, 0, 1);
BUG_ON(ret);
continue;
}
} }
WARN_ON(*level <= 0); WARN_ON(*level <= 0);
if (path->nodes[*level-1]) if (path->nodes[*level-1])
...@@ -2584,6 +2603,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, ...@@ -2584,6 +2603,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
root_owner = btrfs_header_owner(parent); root_owner = btrfs_header_owner(parent);
root_gen = btrfs_header_generation(parent); root_gen = btrfs_header_generation(parent);
mutex_lock(&root->fs_info->alloc_mutex);
ret = __btrfs_free_extent(trans, root, bytenr, blocksize, ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
root_owner, root_gen, 0, 0, 1); root_owner, root_gen, 0, 0, 1);
free_extent_buffer(path->nodes[*level]); free_extent_buffer(path->nodes[*level]);
...@@ -2591,6 +2612,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, ...@@ -2591,6 +2612,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
*level += 1; *level += 1;
BUG_ON(ret); BUG_ON(ret);
mutex_unlock(&root->fs_info->alloc_mutex); mutex_unlock(&root->fs_info->alloc_mutex);
cond_resched(); cond_resched();
return 0; return 0;
} }
...@@ -2834,6 +2856,11 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, ...@@ -2834,6 +2856,11 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
} }
set_page_extent_mapped(page); set_page_extent_mapped(page);
/*
* make sure page_mkwrite is called for this page if userland
* wants to change it from mmap
*/
clear_page_dirty_for_io(page);
set_extent_delalloc(io_tree, page_start, set_extent_delalloc(io_tree, page_start,
page_end, GFP_NOFS); page_end, GFP_NOFS);
......
...@@ -338,6 +338,13 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, ...@@ -338,6 +338,13 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1); btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1);
BUG_ON(err); BUG_ON(err);
mutex_unlock(&BTRFS_I(inode)->extent_mutex); mutex_unlock(&BTRFS_I(inode)->extent_mutex);
/*
* an ugly way to do all the prop accounting around
* the page bits and mapping tags
*/
set_page_writeback(pages[0]);
end_page_writeback(pages[0]);
did_inline = 1; did_inline = 1;
} }
if (end_pos > isize) { if (end_pos > isize) {
...@@ -833,11 +840,7 @@ static int prepare_pages(struct btrfs_root *root, struct file *file, ...@@ -833,11 +840,7 @@ static int prepare_pages(struct btrfs_root *root, struct file *file,
start_pos, last_pos - 1, GFP_NOFS); start_pos, last_pos - 1, GFP_NOFS);
} }
for (i = 0; i < num_pages; i++) { for (i = 0; i < num_pages; i++) {
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) clear_page_dirty_for_io(pages[i]);
ClearPageDirty(pages[i]);
#else
cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
#endif
set_page_extent_mapped(pages[i]); set_page_extent_mapped(pages[i]);
WARN_ON(!PageLocked(pages[i])); WARN_ON(!PageLocked(pages[i]));
} }
......
...@@ -268,6 +268,12 @@ int btrfs_defrag_file(struct file *file) ...@@ -268,6 +268,12 @@ int btrfs_defrag_file(struct file *file)
} }
set_page_extent_mapped(page); set_page_extent_mapped(page);
/*
* this makes sure page_mkwrite is called on the
* page if it is dirtied again later
*/
clear_page_dirty_for_io(page);
set_extent_delalloc(io_tree, page_start, set_extent_delalloc(io_tree, page_start,
page_end, GFP_NOFS); page_end, GFP_NOFS);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment