Commit 96b5179d authored by Chris Mason's avatar Chris Mason

Btrfs: Stop using radix trees for the block group cache

Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent f510cfec
......@@ -271,8 +271,6 @@ struct btrfs_csum_item {
} __attribute__ ((__packed__));
/* tag for the radix tree of block groups in ram */
#define BTRFS_BLOCK_GROUP_DIRTY 0
#define BTRFS_BLOCK_GROUP_AVAIL 1
#define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024)
......@@ -285,7 +283,6 @@ struct btrfs_block_group_item {
struct btrfs_block_group_cache {
struct btrfs_key key;
struct btrfs_block_group_item item;
struct radix_tree_root *radix;
u64 first_free;
u64 last_alloc;
u64 pinned;
......@@ -301,10 +298,9 @@ struct btrfs_fs_info {
struct radix_tree_root fs_roots_radix;
struct radix_tree_root pending_del_radix;
struct radix_tree_root pinned_radix;
struct radix_tree_root block_group_radix;
struct radix_tree_root block_group_data_radix;
struct radix_tree_root extent_ins_radix;
struct extent_map_tree free_space_cache;
struct extent_map_tree block_group_cache;
u64 generation;
u64 last_trans_committed;
struct btrfs_transaction *running_transaction;
......
......@@ -436,8 +436,6 @@ struct btrfs_root *open_ctree(struct super_block *sb)
init_bit_radix(&fs_info->pending_del_radix);
init_bit_radix(&fs_info->extent_ins_radix);
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL);
INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL);
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj));
......@@ -458,6 +456,8 @@ struct btrfs_root *open_ctree(struct super_block *sb)
GFP_NOFS);
extent_map_tree_init(&fs_info->free_space_cache,
fs_info->btree_inode->i_mapping, GFP_NOFS);
extent_map_tree_init(&fs_info->block_group_cache,
fs_info->btree_inode->i_mapping, GFP_NOFS);
fs_info->do_barriers = 1;
fs_info->closing = 0;
......
......@@ -22,6 +22,10 @@
#include "print-tree.h"
#include "transaction.h"
#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
#define BLOCK_GROUP_DIRTY EXTENT_DIRTY
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
btrfs_root *extent_root);
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
......@@ -127,25 +131,31 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
btrfs_fs_info *info,
u64 blocknr)
{
struct btrfs_block_group_cache *block_group;
struct extent_map_tree *block_group_cache;
struct btrfs_block_group_cache *block_group = NULL;
u64 ptr;
u64 start;
u64 end;
int ret;
ret = radix_tree_gang_lookup(&info->block_group_radix,
(void **)&block_group,
blocknr, 1);
block_group_cache = &info->block_group_cache;
ret = find_first_extent_bit(block_group_cache,
blocknr, &start, &end,
BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
if (ret) {
if (block_group->key.objectid <= blocknr && blocknr <=
block_group->key.objectid + block_group->key.offset)
return block_group;
}
ret = radix_tree_gang_lookup(&info->block_group_data_radix,
(void **)&block_group,
blocknr, 1);
if (ret) {
if (block_group->key.objectid <= blocknr && blocknr <=
block_group->key.objectid + block_group->key.offset)
return block_group;
return NULL;
}
ret = get_state_private(block_group_cache, start, &ptr);
if (ret)
return NULL;
block_group = (struct btrfs_block_group_cache *)ptr;
if (block_group->key.objectid <= blocknr && blocknr <=
block_group->key.objectid + block_group->key.offset)
return block_group;
return NULL;
}
......@@ -173,7 +183,7 @@ static u64 find_search_start(struct btrfs_root *root,
last = end + 1;
if (end + 1 - start < num)
continue;
if (start + num > cache->key.objectid + cache->key.offset)
if (start + num >= cache->key.objectid + cache->key.offset)
goto new_group;
return start;
}
......@@ -189,6 +199,7 @@ static u64 find_search_start(struct btrfs_root *root,
cache = btrfs_find_block_group(root, cache,
last + cache->key.offset - 1, data, 0);
*cache_ret = cache;
last = min(cache->key.objectid, last);
goto again;
}
......@@ -204,30 +215,32 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
*hint, u64 search_start,
int data, int owner)
{
struct btrfs_block_group_cache *cache[8];
struct btrfs_block_group_cache *cache;
struct extent_map_tree *block_group_cache;
struct btrfs_block_group_cache *found_group = NULL;
struct btrfs_fs_info *info = root->fs_info;
struct radix_tree_root *radix;
struct radix_tree_root *swap_radix;
u64 used;
u64 last = 0;
u64 hint_last;
int i;
u64 start;
u64 end;
u64 free_check;
u64 ptr;
int bit;
int ret;
int full_search = 0;
int factor = 8;
int data_swap = 0;
block_group_cache = &info->block_group_cache;
if (!owner)
factor = 5;
if (data) {
radix = &info->block_group_data_radix;
swap_radix = &info->block_group_radix;
} else {
radix = &info->block_group_radix;
swap_radix = &info->block_group_data_radix;
}
if (data)
bit = BLOCK_GROUP_DATA;
else
bit = BLOCK_GROUP_METADATA;
if (search_start) {
struct btrfs_block_group_cache *shint;
......@@ -246,12 +259,6 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
div_factor(hint->key.offset, factor)) {
return hint;
}
if (used >= div_factor(hint->key.offset, 8)) {
radix_tree_tag_clear(radix,
hint->key.objectid +
hint->key.offset - 1,
BTRFS_BLOCK_GROUP_AVAIL);
}
last = hint->key.offset * 3;
if (hint->key.objectid >= last)
last = max(search_start + hint->key.offset - 1,
......@@ -267,51 +274,29 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
last = hint_last;
}
while(1) {
ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
last, ARRAY_SIZE(cache),
BTRFS_BLOCK_GROUP_AVAIL);
if (!ret)
break;
for (i = 0; i < ret; i++) {
last = cache[i]->key.objectid +
cache[i]->key.offset;
used = btrfs_block_group_used(&cache[i]->item);
if (used + cache[i]->pinned <
div_factor(cache[i]->key.offset, factor)) {
found_group = cache[i];
goto found;
}
if (used >= div_factor(cache[i]->key.offset, 8)) {
radix_tree_tag_clear(radix,
cache[i]->key.objectid +
cache[i]->key.offset - 1,
BTRFS_BLOCK_GROUP_AVAIL);
}
}
cond_resched();
}
last = hint_last;
again:
while(1) {
ret = radix_tree_gang_lookup(radix, (void **)cache,
last, ARRAY_SIZE(cache));
if (!ret)
ret = find_first_extent_bit(block_group_cache, last,
&start, &end, bit);
if (ret)
break;
for (i = 0; i < ret; i++) {
last = cache[i]->key.objectid +
cache[i]->key.offset;
used = btrfs_block_group_used(&cache[i]->item);
if (used + cache[i]->pinned < cache[i]->key.offset) {
found_group = cache[i];
goto found;
}
if (used >= cache[i]->key.offset) {
radix_tree_tag_clear(radix,
cache[i]->key.objectid +
cache[i]->key.offset - 1,
BTRFS_BLOCK_GROUP_AVAIL);
}
ret = get_state_private(block_group_cache, start, &ptr);
if (ret)
break;
cache = (struct btrfs_block_group_cache *)ptr;
last = cache->key.objectid + cache->key.offset;
used = btrfs_block_group_used(&cache->item);
if (full_search)
free_check = cache->key.offset;
else
free_check = div_factor(cache->key.offset, factor);
if (used + cache->pinned < free_check) {
found_group = cache;
goto found;
}
cond_resched();
}
......@@ -321,23 +306,11 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
goto again;
}
if (!data_swap) {
struct radix_tree_root *tmp = radix;
data_swap = 1;
radix = swap_radix;
swap_radix = tmp;
bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
last = search_start;
goto again;
}
if (!found_group) {
ret = radix_tree_gang_lookup(radix,
(void **)&found_group, 0, 1);
if (ret == 0) {
ret = radix_tree_gang_lookup(swap_radix,
(void **)&found_group,
0, 1);
}
BUG_ON(ret != 1);
}
found:
return found_group;
}
......@@ -538,68 +511,55 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
}
static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct radix_tree_root *radix)
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_block_group_cache *cache[8];
struct extent_map_tree *block_group_cache;
struct btrfs_block_group_cache *cache;
int ret;
int err = 0;
int werr = 0;
int i;
struct btrfs_path *path;
unsigned long off = 0;
u64 last = 0;
u64 start;
u64 end;
u64 ptr;
block_group_cache = &root->fs_info->block_group_cache;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
while(1) {
ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
off, ARRAY_SIZE(cache),
BTRFS_BLOCK_GROUP_DIRTY);
if (!ret)
ret = find_first_extent_bit(block_group_cache, last,
&start, &end, BLOCK_GROUP_DIRTY);
if (ret)
break;
for (i = 0; i < ret; i++) {
err = write_one_cache_group(trans, root,
path, cache[i]);
/*
* if we fail to write the cache group, we want
* to keep it marked dirty in hopes that a later
* write will work
*/
if (err) {
werr = err;
off = cache[i]->key.objectid +
cache[i]->key.offset;
continue;
}
radix_tree_tag_clear(radix, cache[i]->key.objectid +
cache[i]->key.offset - 1,
BTRFS_BLOCK_GROUP_DIRTY);
last = end + 1;
ret = get_state_private(block_group_cache, start, &ptr);
if (ret)
break;
cache = (struct btrfs_block_group_cache *)ptr;
err = write_one_cache_group(trans, root,
path, cache);
/*
* if we fail to write the cache group, we want
* to keep it marked dirty in hopes that a later
* write will work
*/
if (err) {
werr = err;
continue;
}
clear_extent_bits(block_group_cache, start, end,
BLOCK_GROUP_DIRTY, GFP_NOFS);
}
btrfs_free_path(path);
return werr;
}
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
int ret;
int ret2;
ret = write_dirty_block_radix(trans, root,
&root->fs_info->block_group_radix);
ret2 = write_dirty_block_radix(trans, root,
&root->fs_info->block_group_data_radix);
if (ret)
return ret;
if (ret2)
return ret2;
return 0;
}
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 blocknr, u64 num, int alloc, int mark_free,
......@@ -610,7 +570,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
u64 total = num;
u64 old_val;
u64 block_in_group;
int ret;
u64 start;
u64 end;
while(total) {
cache = btrfs_lookup_block_group(info, blocknr);
......@@ -619,9 +580,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
}
block_in_group = blocknr - cache->key.objectid;
WARN_ON(block_in_group > cache->key.offset);
radix_tree_tag_set(cache->radix, cache->key.objectid +
cache->key.offset - 1,
BTRFS_BLOCK_GROUP_DIRTY);
start = cache->key.objectid;
end = start + cache->key.offset - 1;
set_extent_bits(&info->block_group_cache, start, end,
BLOCK_GROUP_DIRTY, GFP_NOFS);
old_val = btrfs_block_group_used(&cache->item);
num = min(total, cache->key.offset - block_in_group);
......@@ -630,25 +592,27 @@ static int update_block_group(struct btrfs_trans_handle *trans,
cache->last_alloc = blocknr;
if (cache->data != data &&
old_val < (cache->key.offset >> 1)) {
cache->data = data;
radix_tree_delete(cache->radix,
cache->key.objectid +
cache->key.offset - 1);
int bit_to_clear;
int bit_to_set;
cache->data = data;
if (data) {
cache->radix =
&info->block_group_data_radix;
bit_to_clear = BLOCK_GROUP_DATA;
bit_to_set = BLOCK_GROUP_METADATA;
cache->item.flags |=
BTRFS_BLOCK_GROUP_DATA;
} else {
cache->radix = &info->block_group_radix;
bit_to_clear = BLOCK_GROUP_METADATA;
bit_to_set = BLOCK_GROUP_DATA;
cache->item.flags &=
~BTRFS_BLOCK_GROUP_DATA;
}
ret = radix_tree_insert(cache->radix,
cache->key.objectid +
cache->key.offset - 1,
(void *)cache);
clear_extent_bits(&info->block_group_cache,
start, end, bit_to_clear,
GFP_NOFS);
set_extent_bits(&info->block_group_cache,
start, end, bit_to_set,
GFP_NOFS);
}
old_val += num;
} else {
......@@ -660,13 +624,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
blocknr, blocknr + num - 1,
GFP_NOFS);
}
if (old_val < (cache->key.offset >> 1) &&
old_val + num >= (cache->key.offset >> 1)) {
radix_tree_tag_set(cache->radix,
cache->key.objectid +
cache->key.offset - 1,
BTRFS_BLOCK_GROUP_AVAIL);
}
}
btrfs_set_block_group_used(&cache->item, old_val);
total -= num;
......@@ -730,11 +687,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
block_group->pinned--;
if (gang[i] < block_group->last_alloc)
block_group->last_alloc = gang[i];
if (!block_group->data) {
set_extent_dirty(free_space_cache,
gang[i], gang[i],
GFP_NOFS);
}
set_extent_dirty(free_space_cache,
gang[i], gang[i], GFP_NOFS);
}
}
}
......@@ -1059,8 +1013,8 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
ins->offset = search_end - ins->objectid;
goto check_pending;
}
btrfs_item_key_to_cpu(l, &key, slot);
if (key.objectid >= search_start && key.objectid > last_block &&
start_found) {
if (last_block < search_start)
......@@ -1072,9 +1026,14 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
goto check_pending;
}
}
if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
if (!start_found) {
last_block = key.objectid;
start_found = 1;
}
goto next;
}
start_found = 1;
last_block = key.objectid + key.offset;
......@@ -1120,9 +1079,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
}
ins->offset = num_blocks;
btrfs_free_path(path);
if (0 && ins->objectid != cached_search_start) {
printk("\tcached was %Lu found %Lu\n", cached_search_start, ins->objectid);
}
return 0;
new_group:
......@@ -1529,40 +1485,20 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
return ret;
}
static int free_block_group_radix(struct radix_tree_root *radix)
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
u64 start;
u64 end;
int ret;
struct btrfs_block_group_cache *cache[8];
int i;
while(1) {
ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
ARRAY_SIZE(cache));
if (!ret)
ret = find_first_extent_bit(&info->block_group_cache, 0,
&start, &end, (unsigned int)-1);
if (ret)
break;
for (i = 0; i < ret; i++) {
radix_tree_delete(radix, cache[i]->key.objectid +
cache[i]->key.offset - 1);
kfree(cache[i]);
}
clear_extent_bits(&info->block_group_cache, start,
end, (unsigned int)-1, GFP_NOFS);
}
return 0;
}
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
int ret;
int ret2;
u64 start;
u64 end;
ret = free_block_group_radix(&info->block_group_radix);
ret2 = free_block_group_radix(&info->block_group_data_radix);
if (ret)
return ret;
if (ret2)
return ret2;
while(1) {
ret = find_first_extent_bit(&info->free_space_cache, 0,
&start, &end, EXTENT_DIRTY);
......@@ -1579,17 +1515,20 @@ int btrfs_read_block_groups(struct btrfs_root *root)
struct btrfs_path *path;
int ret;
int err = 0;
int bit;
struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *info = root->fs_info;
struct radix_tree_root *radix;
struct extent_map_tree *block_group_cache;
struct btrfs_key key;
struct btrfs_key found_key;
struct extent_buffer *leaf;
u64 group_size_blocks;
u64 used;
block_group_cache = &info->block_group_cache;
group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >>
root->fs_info->sb->s_blocksize_bits;
info->sb->s_blocksize_bits;
root = info->extent_root;
key.objectid = 0;
key.offset = group_size_blocks;
......@@ -1617,35 +1556,30 @@ int btrfs_read_block_groups(struct btrfs_root *root)
read_extent_buffer(leaf, &cache->item,
btrfs_item_ptr_offset(leaf, path->slots[0]),
sizeof(cache->item));
if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
radix = &info->block_group_data_radix;
cache->data = 1;
} else {
radix = &info->block_group_radix;
cache->data = 0;
}
memcpy(&cache->key, &found_key, sizeof(found_key));
cache->last_alloc = cache->key.objectid;
cache->first_free = cache->key.objectid;
cache->pinned = 0;
cache->cached = 0;
cache->radix = radix;
key.objectid = found_key.objectid + found_key.offset;
btrfs_release_path(root, path);
ret = radix_tree_insert(radix, found_key.objectid +
found_key.offset - 1,
(void *)cache);
BUG_ON(ret);
used = btrfs_block_group_used(&cache->item);
if (used < div_factor(key.offset, 8)) {
radix_tree_tag_set(radix, found_key.objectid +
found_key.offset - 1,
BTRFS_BLOCK_GROUP_AVAIL);
if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
bit = BLOCK_GROUP_DATA;
cache->data = 1;
} else {
bit = BLOCK_GROUP_METADATA;
cache->data = 0;
}
/* use EXTENT_LOCKED to prevent merging */
set_extent_bits(block_group_cache, found_key.objectid,
found_key.objectid + found_key.offset - 1,
bit | EXTENT_LOCKED, GFP_NOFS);
set_state_private(block_group_cache, found_key.objectid,
(u64)cache);
if (key.objectid >=
btrfs_super_total_blocks(&info->super_copy))
break;
......
......@@ -574,7 +574,7 @@ int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end,
return set;
search_again:
if (start >= end)
if (start > end)
goto out;
write_unlock_irqrestore(&tree->lock, flags);
if (mask & __GFP_WAIT)
......@@ -819,6 +819,21 @@ int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
}
EXPORT_SYMBOL(set_extent_dirty);
int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
int bits, gfp_t mask)
{
return set_extent_bit(tree, start, end, bits, 0, NULL,
mask);
}
EXPORT_SYMBOL(set_extent_bits);
int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
int bits, gfp_t mask)
{
return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
}
EXPORT_SYMBOL(clear_extent_bits);
int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end,
gfp_t mask)
{
......@@ -1138,7 +1153,6 @@ int set_state_private(struct extent_map_tree *tree, u64 start, u64 private)
out:
write_unlock_irq(&tree->lock);
return ret;
}
int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private)
......
......@@ -96,6 +96,10 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
void __init extent_map_init(void);
void __exit extent_map_exit(void);
int extent_clean_all_trees(struct extent_map_tree *tree);
int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
int bits, gfp_t mask);
int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
int bits, gfp_t mask);
int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
gfp_t mask);
int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment