Commit 1edbb734 authored by Chris Mason's avatar Chris Mason

Btrfs: reduce CPU usage in the extent_state tree

Btrfs is currently mirroring some of the page state bits into
its extent state tree.  The goal behind this was to use it in supporting
blocksizes other than the page size.

But, we don't currently support that, and we're using quite a lot of CPU
on the rb tree and its spin lock.  This commit starts a series of
cleanups to reduce the amount of work done in the extent state tree as
part of each IO.

This commit:

* Adds the ability to lock an extent in the state tree and also set
other bits.  The idea is to do locking and delalloc in one call

* Removes the EXTENT_WRITEBACK and EXTENT_DIRTY bits.  Btrfs is using
a combination of the page bits and the ordered write code for this
instead.
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent e48c465b
...@@ -654,25 +654,24 @@ static void set_state_bits(struct extent_io_tree *tree, ...@@ -654,25 +654,24 @@ static void set_state_bits(struct extent_io_tree *tree,
} }
/* /*
* set some bits on a range in the tree. This may require allocations * set some bits on a range in the tree. This may require allocations or
* or sleeping, so the gfp mask is used to indicate what is allowed. * sleeping, so the gfp mask is used to indicate what is allowed.
* *
* If 'exclusive' == 1, this will fail with -EEXIST if some part of the * If any of the exclusive bits are set, this will fail with -EEXIST if some
* range already has the desired bits set. The start of the existing * part of the range already has the desired bits set. The start of the
* range is returned in failed_start in this case. * existing range is returned in failed_start in this case.
* *
* [start, end] is inclusive * [start, end] is inclusive This takes the tree lock.
* This takes the tree lock.
*/ */
static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int exclusive, u64 *failed_start, int bits, int exclusive_bits, u64 *failed_start,
gfp_t mask) gfp_t mask)
{ {
struct extent_state *state; struct extent_state *state;
struct extent_state *prealloc = NULL; struct extent_state *prealloc = NULL;
struct rb_node *node; struct rb_node *node;
int err = 0; int err = 0;
int set;
u64 last_start; u64 last_start;
u64 last_end; u64 last_end;
again: again:
...@@ -707,8 +706,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -707,8 +706,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
*/ */
if (state->start == start && state->end <= end) { if (state->start == start && state->end <= end) {
struct rb_node *next_node; struct rb_node *next_node;
set = state->state & bits; if (state->state & exclusive_bits) {
if (set && exclusive) {
*failed_start = state->start; *failed_start = state->start;
err = -EEXIST; err = -EEXIST;
goto out; goto out;
...@@ -748,8 +746,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -748,8 +746,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
* desired bit on it. * desired bit on it.
*/ */
if (state->start < start) { if (state->start < start) {
set = state->state & bits; if (state->state & exclusive_bits) {
if (exclusive && set) {
*failed_start = start; *failed_start = start;
err = -EEXIST; err = -EEXIST;
goto out; goto out;
...@@ -799,8 +796,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -799,8 +796,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
* on the first half * on the first half
*/ */
if (state->start <= end && state->end > end) { if (state->start <= end && state->end > end) {
set = state->state & bits; if (state->state & exclusive_bits) {
if (exclusive && set) {
*failed_start = start; *failed_start = start;
err = -EEXIST; err = -EEXIST;
goto out; goto out;
...@@ -906,19 +902,6 @@ static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, ...@@ -906,19 +902,6 @@ static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
} }
static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
0, NULL, mask);
}
static int clear_extent_writeback(struct extent_io_tree *tree, u64 start,
u64 end, gfp_t mask)
{
return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
}
int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
{ {
return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
...@@ -928,13 +911,14 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) ...@@ -928,13 +911,14 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
* either insert or lock state struct between start and end use mask to tell * either insert or lock state struct between start and end use mask to tell
* us if waiting is desired. * us if waiting is desired.
*/ */
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask)
{ {
int err; int err;
u64 failed_start; u64 failed_start;
while (1) { while (1) {
err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
&failed_start, mask); EXTENT_LOCKED, &failed_start, mask);
if (err == -EEXIST && (mask & __GFP_WAIT)) { if (err == -EEXIST && (mask & __GFP_WAIT)) {
wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
start = failed_start; start = failed_start;
...@@ -946,6 +930,11 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) ...@@ -946,6 +930,11 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
return err; return err;
} }
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
{
return lock_extent_bits(tree, start, end, 0, mask);
}
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask) gfp_t mask)
{ {
...@@ -985,7 +974,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) ...@@ -985,7 +974,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
page_cache_release(page); page_cache_release(page);
index++; index++;
} }
set_extent_dirty(tree, start, end, GFP_NOFS);
return 0; return 0;
} }
...@@ -1005,7 +993,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) ...@@ -1005,7 +993,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
page_cache_release(page); page_cache_release(page);
index++; index++;
} }
set_extent_writeback(tree, start, end, GFP_NOFS);
return 0; return 0;
} }
...@@ -1563,9 +1550,6 @@ static int check_page_locked(struct extent_io_tree *tree, ...@@ -1563,9 +1550,6 @@ static int check_page_locked(struct extent_io_tree *tree,
static int check_page_writeback(struct extent_io_tree *tree, static int check_page_writeback(struct extent_io_tree *tree,
struct page *page) struct page *page)
{ {
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 end = start + PAGE_CACHE_SIZE - 1;
if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
end_page_writeback(page); end_page_writeback(page);
return 0; return 0;
} }
...@@ -1624,13 +1608,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err) ...@@ -1624,13 +1608,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
} }
if (!uptodate) { if (!uptodate) {
clear_extent_uptodate(tree, start, end, GFP_ATOMIC); clear_extent_uptodate(tree, start, end, GFP_NOFS);
ClearPageUptodate(page); ClearPageUptodate(page);
SetPageError(page); SetPageError(page);
} }
clear_extent_writeback(tree, start, end, GFP_ATOMIC);
if (whole_page) if (whole_page)
end_page_writeback(page); end_page_writeback(page);
else else
...@@ -2208,8 +2190,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, ...@@ -2208,8 +2190,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
printk(KERN_ERR "btrfs delalloc bits after lock_extent\n"); printk(KERN_ERR "btrfs delalloc bits after lock_extent\n");
if (last_byte <= start) { if (last_byte <= start) {
clear_extent_dirty(tree, start, page_end, GFP_NOFS); clear_extent_bit(tree, start, page_end,
unlock_extent(tree, start, page_end, GFP_NOFS); EXTENT_LOCKED | EXTENT_DIRTY,
1, 0, GFP_NOFS);
if (tree->ops && tree->ops->writepage_end_io_hook) if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, start, tree->ops->writepage_end_io_hook(page, start,
page_end, NULL, 1); page_end, NULL, 1);
...@@ -2217,12 +2200,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, ...@@ -2217,12 +2200,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
goto done; goto done;
} }
set_extent_uptodate(tree, start, page_end, GFP_NOFS);
blocksize = inode->i_sb->s_blocksize; blocksize = inode->i_sb->s_blocksize;
while (cur <= end) { while (cur <= end) {
if (cur >= last_byte) { if (cur >= last_byte) {
clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
unlock_extent(tree, unlock_start, page_end, GFP_NOFS); unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
if (tree->ops && tree->ops->writepage_end_io_hook) if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, cur, tree->ops->writepage_end_io_hook(page, cur,
...@@ -2255,9 +2236,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, ...@@ -2255,9 +2236,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
*/ */
if (compressed || block_start == EXTENT_MAP_HOLE || if (compressed || block_start == EXTENT_MAP_HOLE ||
block_start == EXTENT_MAP_INLINE) { block_start == EXTENT_MAP_INLINE) {
clear_extent_dirty(tree, cur,
cur + iosize - 1, GFP_NOFS);
unlock_extent(tree, unlock_start, cur + iosize - 1, unlock_extent(tree, unlock_start, cur + iosize - 1,
GFP_NOFS); GFP_NOFS);
...@@ -2291,7 +2269,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, ...@@ -2291,7 +2269,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
continue; continue;
} }
clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
if (tree->ops && tree->ops->writepage_io_hook) { if (tree->ops && tree->ops->writepage_io_hook) {
ret = tree->ops->writepage_io_hook(page, cur, ret = tree->ops->writepage_io_hook(page, cur,
cur + iosize - 1); cur + iosize - 1);
...@@ -2619,7 +2596,7 @@ int extent_invalidatepage(struct extent_io_tree *tree, ...@@ -2619,7 +2596,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
return 0; return 0;
lock_extent(tree, start, end, GFP_NOFS); lock_extent(tree, start, end, GFP_NOFS);
wait_on_extent_writeback(tree, start, end); wait_on_page_writeback(page);
clear_extent_bit(tree, start, end, clear_extent_bit(tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
1, 1, GFP_NOFS); 1, 1, GFP_NOFS);
......
...@@ -142,6 +142,8 @@ int try_release_extent_state(struct extent_map_tree *map, ...@@ -142,6 +142,8 @@ int try_release_extent_state(struct extent_map_tree *map,
struct extent_io_tree *tree, struct page *page, struct extent_io_tree *tree, struct page *page,
gfp_t mask); gfp_t mask);
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask);
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask); gfp_t mask);
......
...@@ -113,8 +113,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, ...@@ -113,8 +113,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
int err = 0; int err = 0;
int i; int i;
struct inode *inode = fdentry(file)->d_inode; struct inode *inode = fdentry(file)->d_inode;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
u64 hint_byte;
u64 num_bytes; u64 num_bytes;
u64 start_pos; u64 start_pos;
u64 end_of_last_block; u64 end_of_last_block;
...@@ -126,20 +124,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, ...@@ -126,20 +124,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
root->sectorsize - 1) & ~((u64)root->sectorsize - 1); root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
end_of_last_block = start_pos + num_bytes - 1; end_of_last_block = start_pos + num_bytes - 1;
lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
trans = btrfs_join_transaction(root, 1);
if (!trans) {
err = -ENOMEM;
goto out_unlock;
}
btrfs_set_trans_block_group(trans, inode);
hint_byte = 0;
/* check for reserved extents on each page, we don't want
* to reset the delalloc bit on things that already have
* extents reserved.
*/
btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
for (i = 0; i < num_pages; i++) { for (i = 0; i < num_pages; i++) {
struct page *p = pages[i]; struct page *p = pages[i];
...@@ -154,9 +138,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, ...@@ -154,9 +138,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
* at this time. * at this time.
*/ */
} }
err = btrfs_end_transaction(trans, root);
out_unlock:
unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
return err; return err;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment