Commit 87c11705 authored by Josef Bacik's avatar Josef Bacik Committed by David Sterba

btrfs: convert the io_failure_tree to a plain rb_tree

We still have this oddity of stashing the io_failure_record in the
extent state for the io_failure_tree, which is leftover from when we
used to stuff private pointers in extent_io_trees.

However this doesn't make a lot of sense for the io failure records, we
can simply use a normal rb_tree for this.  This will allow us to further
simplify the extent_io_tree code by removing the io_failure_rec pointer
from the extent state.

Convert the io_failure_tree to an rb tree + spinlock in the inode, and
then use our rb tree simple helpers to insert and find failed records.
This greatly cleans up this code and makes it easier to separate out the
extent_io_tree code.
Signed-off-by: default avatarJosef Bacik <josef@toxicpanda.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent a2061748
...@@ -94,7 +94,8 @@ struct btrfs_inode { ...@@ -94,7 +94,8 @@ struct btrfs_inode {
/* special utility tree used to record which mirrors have already been /* special utility tree used to record which mirrors have already been
* tried when checksums fail for a given block * tried when checksums fail for a given block
*/ */
struct extent_io_tree io_failure_tree; struct rb_root io_failure_tree;
spinlock_t io_failure_lock;
/* /*
* Keep track of where the inode has extent items mapped in order to * Keep track of where the inode has extent items mapped in order to
......
...@@ -56,7 +56,6 @@ enum { ...@@ -56,7 +56,6 @@ enum {
IO_TREE_FS_EXCLUDED_EXTENTS, IO_TREE_FS_EXCLUDED_EXTENTS,
IO_TREE_BTREE_INODE_IO, IO_TREE_BTREE_INODE_IO,
IO_TREE_INODE_IO, IO_TREE_INODE_IO,
IO_TREE_INODE_IO_FAILURE,
IO_TREE_RELOC_BLOCKS, IO_TREE_RELOC_BLOCKS,
IO_TREE_TRANS_DIRTY_PAGES, IO_TREE_TRANS_DIRTY_PAGES,
IO_TREE_ROOT_DIRTY_LOG_PAGES, IO_TREE_ROOT_DIRTY_LOG_PAGES,
...@@ -89,8 +88,6 @@ struct extent_state { ...@@ -89,8 +88,6 @@ struct extent_state {
refcount_t refs; refcount_t refs;
u32 state; u32 state;
struct io_failure_record *failrec;
#ifdef CONFIG_BTRFS_DEBUG #ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list; struct list_head leak_list;
#endif #endif
......
...@@ -326,7 +326,6 @@ static struct extent_state *alloc_extent_state(gfp_t mask) ...@@ -326,7 +326,6 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
if (!state) if (!state)
return state; return state;
state->state = 0; state->state = 0;
state->failrec = NULL;
RB_CLEAR_NODE(&state->rb_node); RB_CLEAR_NODE(&state->rb_node);
btrfs_leak_debug_add(&leak_lock, &state->leak_list, &states); btrfs_leak_debug_add(&leak_lock, &state->leak_list, &states);
refcount_set(&state->refs, 1); refcount_set(&state->refs, 1);
...@@ -2159,64 +2158,29 @@ u64 count_range_bits(struct extent_io_tree *tree, ...@@ -2159,64 +2158,29 @@ u64 count_range_bits(struct extent_io_tree *tree,
return total_bytes; return total_bytes;
} }
/* static int insert_failrec(struct btrfs_inode *inode,
* set the private field for a given byte offset in the tree. If there isn't struct io_failure_record *failrec)
* an extent_state there already, this does nothing.
*/
static int set_state_failrec(struct extent_io_tree *tree, u64 start,
struct io_failure_record *failrec)
{ {
struct rb_node *node; struct rb_node *exist;
struct extent_state *state;
int ret = 0;
spin_lock(&tree->lock); spin_lock(&inode->io_failure_lock);
/* exist = rb_simple_insert(&inode->io_failure_tree, failrec->bytenr,
* this search will find all the extents that end after &failrec->rb_node);
* our range starts. spin_unlock(&inode->io_failure_lock);
*/
node = tree_search(tree, start); return (exist == NULL) ? 0 : -EEXIST;
if (!node) {
ret = -ENOENT;
goto out;
}
state = rb_entry(node, struct extent_state, rb_node);
if (state->start != start) {
ret = -ENOENT;
goto out;
}
state->failrec = failrec;
out:
spin_unlock(&tree->lock);
return ret;
} }
static struct io_failure_record *get_state_failrec(struct extent_io_tree *tree, static struct io_failure_record *get_failrec(struct btrfs_inode *inode, u64 start)
u64 start)
{ {
struct rb_node *node; struct rb_node *node;
struct extent_state *state; struct io_failure_record *failrec = ERR_PTR(-ENOENT);
struct io_failure_record *failrec;
spin_lock(&tree->lock); spin_lock(&inode->io_failure_lock);
/* node = rb_simple_search(&inode->io_failure_tree, start);
* this search will find all the extents that end after if (node)
* our range starts. failrec = rb_entry(node, struct io_failure_record, rb_node);
*/ spin_unlock(&inode->io_failure_lock);
node = tree_search(tree, start);
if (!node) {
failrec = ERR_PTR(-ENOENT);
goto out;
}
state = rb_entry(node, struct extent_state, rb_node);
if (state->start != start) {
failrec = ERR_PTR(-ENOENT);
goto out;
}
failrec = state->failrec;
out:
spin_unlock(&tree->lock);
return failrec; return failrec;
} }
...@@ -2276,28 +2240,20 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -2276,28 +2240,20 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
return bitset; return bitset;
} }
static int free_io_failure(struct extent_io_tree *failure_tree, static int free_io_failure(struct btrfs_inode *inode,
struct extent_io_tree *io_tree,
struct io_failure_record *rec) struct io_failure_record *rec)
{ {
int ret; int ret;
int err = 0;
set_state_failrec(failure_tree, rec->start, NULL); spin_lock(&inode->io_failure_lock);
ret = clear_extent_bits(failure_tree, rec->start, rb_erase(&rec->rb_node, &inode->io_failure_tree);
rec->start + rec->len - 1, spin_unlock(&inode->io_failure_lock);
EXTENT_LOCKED | EXTENT_DIRTY);
if (ret)
err = ret;
ret = clear_extent_bits(io_tree, rec->start, ret = clear_extent_bits(&inode->io_tree, rec->bytenr,
rec->start + rec->len - 1, rec->bytenr + rec->len - 1,
EXTENT_DAMAGED); EXTENT_DAMAGED);
if (ret && !err)
err = ret;
kfree(rec); kfree(rec);
return err; return ret;
} }
/* /*
...@@ -2436,22 +2392,13 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start, ...@@ -2436,22 +2392,13 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
struct page *page, unsigned int pg_offset) struct page *page, unsigned int pg_offset)
{ {
struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_io_tree *failure_tree = &inode->io_failure_tree;
struct extent_io_tree *io_tree = &inode->io_tree; struct extent_io_tree *io_tree = &inode->io_tree;
u64 ino = btrfs_ino(inode); u64 ino = btrfs_ino(inode);
u64 private;
struct io_failure_record *failrec; struct io_failure_record *failrec;
struct extent_state *state; struct extent_state *state;
int mirror; int mirror;
int ret;
private = 0;
ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
EXTENT_DIRTY, 0);
if (!ret)
return 0;
failrec = get_state_failrec(failure_tree, start); failrec = get_failrec(inode, start);
if (IS_ERR(failrec)) if (IS_ERR(failrec))
return 0; return 0;
...@@ -2462,12 +2409,12 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start, ...@@ -2462,12 +2409,12 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
spin_lock(&io_tree->lock); spin_lock(&io_tree->lock);
state = find_first_extent_bit_state(io_tree, state = find_first_extent_bit_state(io_tree,
failrec->start, failrec->bytenr,
EXTENT_LOCKED); EXTENT_LOCKED);
spin_unlock(&io_tree->lock); spin_unlock(&io_tree->lock);
if (!state || state->start > failrec->start || if (!state || state->start > failrec->bytenr ||
state->end < failrec->start + failrec->len - 1) state->end < failrec->bytenr + failrec->len - 1)
goto out; goto out;
mirror = failrec->this_mirror; mirror = failrec->this_mirror;
...@@ -2478,7 +2425,7 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start, ...@@ -2478,7 +2425,7 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
} while (mirror != failrec->failed_mirror); } while (mirror != failrec->failed_mirror);
out: out:
free_io_failure(failure_tree, io_tree, failrec); free_io_failure(inode, failrec);
return 0; return 0;
} }
...@@ -2490,30 +2437,26 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start, ...@@ -2490,30 +2437,26 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
*/ */
void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end) void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
{ {
struct extent_io_tree *failure_tree = &inode->io_failure_tree;
struct io_failure_record *failrec; struct io_failure_record *failrec;
struct extent_state *state, *next; struct rb_node *node, *next;
if (RB_EMPTY_ROOT(&failure_tree->state)) if (RB_EMPTY_ROOT(&inode->io_failure_tree))
return; return;
spin_lock(&failure_tree->lock); spin_lock(&inode->io_failure_lock);
state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY); node = rb_simple_search_first(&inode->io_failure_tree, start);
while (state) { while (node) {
if (state->start > end) failrec = rb_entry(node, struct io_failure_record, rb_node);
if (failrec->bytenr > end)
break; break;
ASSERT(state->end <= end); next = rb_next(node);
rb_erase(&failrec->rb_node, &inode->io_failure_tree);
next = next_state(state);
failrec = state->failrec;
free_extent_state(state);
kfree(failrec); kfree(failrec);
state = next; node = next;
} }
spin_unlock(&failure_tree->lock); spin_unlock(&inode->io_failure_lock);
} }
static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode, static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
...@@ -2523,16 +2466,15 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode ...@@ -2523,16 +2466,15 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 start = bbio->file_offset + bio_offset; u64 start = bbio->file_offset + bio_offset;
struct io_failure_record *failrec; struct io_failure_record *failrec;
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
const u32 sectorsize = fs_info->sectorsize; const u32 sectorsize = fs_info->sectorsize;
int ret; int ret;
failrec = get_state_failrec(failure_tree, start); failrec = get_failrec(BTRFS_I(inode), start);
if (!IS_ERR(failrec)) { if (!IS_ERR(failrec)) {
btrfs_debug(fs_info, btrfs_debug(fs_info,
"Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu", "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu",
failrec->logical, failrec->start, failrec->len); failrec->logical, failrec->bytenr, failrec->len);
/* /*
* when data can be on disk more than twice, add to failrec here * when data can be on disk more than twice, add to failrec here
* (e.g. with a list for failed_mirror) to make * (e.g. with a list for failed_mirror) to make
...@@ -2547,7 +2489,8 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode ...@@ -2547,7 +2489,8 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
if (!failrec) if (!failrec)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
failrec->start = start; RB_CLEAR_NODE(&failrec->rb_node);
failrec->bytenr = start;
failrec->len = sectorsize; failrec->len = sectorsize;
failrec->failed_mirror = bbio->mirror_num; failrec->failed_mirror = bbio->mirror_num;
failrec->this_mirror = bbio->mirror_num; failrec->this_mirror = bbio->mirror_num;
...@@ -2572,17 +2515,17 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode ...@@ -2572,17 +2515,17 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
} }
/* Set the bits in the private failure tree */ /* Set the bits in the private failure tree */
ret = set_extent_bits(failure_tree, start, start + sectorsize - 1, ret = insert_failrec(BTRFS_I(inode), failrec);
EXTENT_LOCKED | EXTENT_DIRTY); if (ret) {
if (ret >= 0) {
ret = set_state_failrec(failure_tree, start, failrec);
/* Set the bits in the inode's tree */
ret = set_extent_bits(tree, start, start + sectorsize - 1,
EXTENT_DAMAGED);
} else if (ret < 0) {
kfree(failrec); kfree(failrec);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
ret = set_extent_bits(tree, start, start + sectorsize - 1,
EXTENT_DAMAGED);
if (ret) {
free_io_failure(BTRFS_I(inode), failrec);
return ERR_PTR(ret);
}
return failrec; return failrec;
} }
...@@ -2594,8 +2537,6 @@ int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio, ...@@ -2594,8 +2537,6 @@ int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
u64 start = failed_bbio->file_offset + bio_offset; u64 start = failed_bbio->file_offset + bio_offset;
struct io_failure_record *failrec; struct io_failure_record *failrec;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
struct bio *failed_bio = &failed_bbio->bio; struct bio *failed_bio = &failed_bbio->bio;
const int icsum = bio_offset >> fs_info->sectorsize_bits; const int icsum = bio_offset >> fs_info->sectorsize_bits;
struct bio *repair_bio; struct bio *repair_bio;
...@@ -2624,7 +2565,7 @@ int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio, ...@@ -2624,7 +2565,7 @@ int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
btrfs_debug(fs_info, btrfs_debug(fs_info,
"failed to repair num_copies %d this_mirror %d failed_mirror %d", "failed to repair num_copies %d this_mirror %d failed_mirror %d",
failrec->num_copies, failrec->this_mirror, failrec->failed_mirror); failrec->num_copies, failrec->this_mirror, failrec->failed_mirror);
free_io_failure(failure_tree, tree, failrec); free_io_failure(BTRFS_I(inode), failrec);
return -EIO; return -EIO;
} }
......
...@@ -254,8 +254,12 @@ int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num); ...@@ -254,8 +254,12 @@ int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);
* bio end_io callback is called to indicate things have failed. * bio end_io callback is called to indicate things have failed.
*/ */
struct io_failure_record { struct io_failure_record {
/* Use rb_simple_node for search/insert */
struct {
struct rb_node rb_node;
u64 bytenr;
};
struct page *page; struct page *page;
u64 start;
u64 len; u64 len;
u64 logical; u64 logical;
int this_mirror; int this_mirror;
......
...@@ -8790,6 +8790,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ...@@ -8790,6 +8790,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->last_log_commit = 0; ei->last_log_commit = 0;
spin_lock_init(&ei->lock); spin_lock_init(&ei->lock);
spin_lock_init(&ei->io_failure_lock);
ei->outstanding_extents = 0; ei->outstanding_extents = 0;
if (sb->s_magic != BTRFS_TEST_MAGIC) if (sb->s_magic != BTRFS_TEST_MAGIC)
btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv, btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
...@@ -8806,12 +8807,10 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ...@@ -8806,12 +8807,10 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
inode = &ei->vfs_inode; inode = &ei->vfs_inode;
extent_map_tree_init(&ei->extent_tree); extent_map_tree_init(&ei->extent_tree);
extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode); extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
extent_io_tree_init(fs_info, &ei->io_failure_tree,
IO_TREE_INODE_IO_FAILURE, inode);
extent_io_tree_init(fs_info, &ei->file_extent_tree, extent_io_tree_init(fs_info, &ei->file_extent_tree,
IO_TREE_INODE_FILE_EXTENT, inode); IO_TREE_INODE_FILE_EXTENT, inode);
ei->io_failure_tree = RB_ROOT;
ei->io_tree.track_uptodate = true; ei->io_tree.track_uptodate = true;
ei->io_failure_tree.track_uptodate = true;
atomic_set(&ei->sync_writers, 0); atomic_set(&ei->sync_writers, 0);
mutex_init(&ei->log_mutex); mutex_init(&ei->log_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree); btrfs_ordered_inode_tree_init(&ei->ordered_tree);
......
...@@ -88,6 +88,41 @@ static inline struct rb_node *rb_simple_search(struct rb_root *root, u64 bytenr) ...@@ -88,6 +88,41 @@ static inline struct rb_node *rb_simple_search(struct rb_root *root, u64 bytenr)
return NULL; return NULL;
} }
/*
* Search @root from an entry that starts or comes after @bytenr.
*
* @root: the root to search.
* @bytenr: bytenr to search from.
*
* Return the rb_node that start at or after @bytenr. If there is no entry at
* or after @bytner return NULL.
*/
static inline struct rb_node *rb_simple_search_first(struct rb_root *root,
u64 bytenr)
{
struct rb_node *node = root->rb_node, *ret = NULL;
struct rb_simple_node *entry, *ret_entry = NULL;
while (node) {
entry = rb_entry(node, struct rb_simple_node, rb_node);
if (bytenr < entry->bytenr) {
if (!ret || entry->bytenr < ret_entry->bytenr) {
ret = node;
ret_entry = entry;
}
node = node->rb_left;
} else if (bytenr > entry->bytenr) {
node = node->rb_right;
} else {
return node;
}
}
return ret;
}
static inline struct rb_node *rb_simple_insert(struct rb_root *root, u64 bytenr, static inline struct rb_node *rb_simple_insert(struct rb_root *root, u64 bytenr,
struct rb_node *node) struct rb_node *node)
{ {
......
...@@ -84,7 +84,6 @@ struct raid56_bio_trace_info; ...@@ -84,7 +84,6 @@ struct raid56_bio_trace_info;
EM( IO_TREE_FS_EXCLUDED_EXTENTS, "EXCLUDED_EXTENTS") \ EM( IO_TREE_FS_EXCLUDED_EXTENTS, "EXCLUDED_EXTENTS") \
EM( IO_TREE_BTREE_INODE_IO, "BTREE_INODE_IO") \ EM( IO_TREE_BTREE_INODE_IO, "BTREE_INODE_IO") \
EM( IO_TREE_INODE_IO, "INODE_IO") \ EM( IO_TREE_INODE_IO, "INODE_IO") \
EM( IO_TREE_INODE_IO_FAILURE, "INODE_IO_FAILURE") \
EM( IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS") \ EM( IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS") \
EM( IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES") \ EM( IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES") \
EM( IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES") \ EM( IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES") \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment