Commit 176840b3 authored by Filipe Manana's avatar Filipe Manana Committed by Josef Bacik

Btrfs: more efficient btrfs_drop_extent_cache

While droping extent map structures from the extent cache that cover our
target range, we would remove each extent map structure from the red black
tree and then add either 1 or 2 new extent map structures if the former
extent map covered sections outside our target range.

This change simply attempts to replace the existing extent map structure
with a new one that covers the subsection we're not interested in, instead
of doing a red black remove operation followed by an insertion operation.

The number of elements in an inode's extent map tree can get very high for large
files under random writes. For example, while running the following test:

    sysbench --test=fileio --file-num=1 --file-total-size=10G \
        --file-test-mode=rndrw --num-threads=32 --file-block-size=32768 \
        --max-requests=500000 --file-rw-ratio=2 [prepare|run]

I captured the following histogram capturing the number of extent_map items
in the red black tree while that test was running:

    Count: 122462
    Range:  1.000 - 172231.000; Mean: 96415.831; Median: 101855.000; Stddev: 49700.981
    Percentiles:  90th: 160120.000; 95th: 166335.000; 99th: 171070.000
       1.000 -    5.231:   452 |
       5.231 -  187.392:    87 |
     187.392 -  585.911:   206 |
     585.911 - 1827.438:   623 |
    1827.438 - 5695.245:  1962 #
    5695.245 - 17744.861:  6204 ####
   17744.861 - 55283.764: 21115 ############
   55283.764 - 172231.000: 91813 #####################################################

Benchmark:

    sysbench --test=fileio --file-num=1 --file-total-size=10G --file-test-mode=rndwr \
        --num-threads=64 --file-block-size=32768 --max-requests=0 --max-time=60 \
        --file-io-mode=sync --file-fsync-freq=0 [prepare|run]

Before this change: 122.1Mb/sec
After this change:  125.07Mb/sec
(averages of 5 test runs)

Test machine: quad core intel i5-3570K, 32Gb of ram, SSD
Signed-off-by: default avatarFilipe David Borba Manana <fdmanana@gmail.com>
Signed-off-by: default avatarJosef Bacik <jbacik@fb.com>
parent f2071b21
......@@ -318,6 +318,20 @@ void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
try_merge_map(tree, em);
}
static inline void setup_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em,
int modified)
{
atomic_inc(&em->refs);
em->mod_start = em->start;
em->mod_len = em->len;
if (modified)
list_move(&em->list, &tree->modified_extents);
else
try_merge_map(tree, em);
}
/**
* add_extent_mapping - add new extent map to the extent tree
* @tree: tree to insert new map in
......@@ -337,15 +351,7 @@ int add_extent_mapping(struct extent_map_tree *tree,
if (ret)
goto out;
atomic_inc(&em->refs);
em->mod_start = em->start;
em->mod_len = em->len;
if (modified)
list_move(&em->list, &tree->modified_extents);
else
try_merge_map(tree, em);
setup_extent_mapping(tree, em, modified);
out:
return ret;
}
......@@ -432,3 +438,18 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
RB_CLEAR_NODE(&em->rb_node);
return ret;
}
void replace_extent_mapping(struct extent_map_tree *tree,
struct extent_map *cur,
struct extent_map *new,
int modified)
{
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
ASSERT(extent_map_in_tree(cur));
if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))
list_del_init(&cur->list);
rb_replace_node(&cur->rb_node, &new->rb_node, &tree->map);
RB_CLEAR_NODE(&cur->rb_node);
setup_extent_mapping(tree, new, modified);
}
......@@ -68,6 +68,10 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
int add_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em, int modified);
int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
void replace_extent_mapping(struct extent_map_tree *tree,
struct extent_map *cur,
struct extent_map *new,
int modified);
struct extent_map *alloc_extent_map(void);
void free_extent_map(struct extent_map *em);
......
......@@ -591,7 +591,6 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
clear_bit(EXTENT_FLAG_LOGGING, &flags);
modified = !list_empty(&em->list);
remove_extent_mapping(em_tree, em);
if (no_splits)
goto next;
......@@ -622,8 +621,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split->bdev = em->bdev;
split->flags = flags;
split->compress_type = em->compress_type;
ret = add_extent_mapping(em_tree, split, modified);
BUG_ON(ret); /* Logic error */
replace_extent_mapping(em_tree, em, split, modified);
free_extent_map(split);
split = split2;
split2 = NULL;
......@@ -661,12 +659,20 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split->orig_block_len = 0;
}
ret = add_extent_mapping(em_tree, split, modified);
BUG_ON(ret); /* Logic error */
if (extent_map_in_tree(em)) {
replace_extent_mapping(em_tree, em, split,
modified);
} else {
ret = add_extent_mapping(em_tree, split,
modified);
ASSERT(ret == 0); /* Logic error */
}
free_extent_map(split);
split = NULL;
}
next:
if (extent_map_in_tree(em))
remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
/* once for us */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment