Commit d3984c90 authored by Josef Bacik's avatar Josef Bacik Committed by David Sterba

btrfs: introduce an evict flushing state

We have this weird space flushing loop inside inode.c for evict where
we'll do the normal LIMIT flush, and then commit the transaction and
hope we get our space.  This is super janky, and in fact there's really
nothing stopping us from using FLUSH_ALL except that we run delayed
iputs, which means we could deadlock.  So introduce a new flush state
for eviction that does the normal priority flushing with all of the
states that are safe for eviction.

The nice side-effect of this is that we'll try harder for evictions.
Previously if (for example generic/269) you had a bunch of other
operations happening on the fs you could race with those reservations
when committing the transaction, and eventually miss getting a
reservation for the evict.  With this code we'll have our ticket in
place through the transaction commit, so any pinned bytes will go to our
pending evictions first.
Signed-off-by: default avatarJosef Bacik <josef@toxicpanda.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 9ce2f423
......@@ -2536,6 +2536,7 @@ enum btrfs_reserve_flush_enum {
* case, use FLUSH LIMIT
*/
BTRFS_RESERVE_FLUSH_LIMIT,
BTRFS_RESERVE_FLUSH_EVICT,
BTRFS_RESERVE_FLUSH_ALL,
};
......
......@@ -5336,59 +5336,50 @@ static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
struct btrfs_trans_handle *trans;
u64 delayed_refs_extra = btrfs_calc_trans_metadata_size(fs_info, 1);
int failures = 0;
for (;;) {
struct btrfs_trans_handle *trans;
int ret;
ret = btrfs_block_rsv_refill(root, rsv,
rsv->size + delayed_refs_extra,
BTRFS_RESERVE_FLUSH_LIMIT);
if (ret && ++failures > 2) {
btrfs_warn(fs_info,
"could not allocate space for a delete; will truncate on mount");
return ERR_PTR(-ENOSPC);
}
/*
* Evict can generate a large amount of delayed refs without
* having a way to add space back since we exhaust our temporary
* block rsv. We aren't allowed to do FLUSH_ALL in this case
* because we could deadlock with so many things in the flushing
* code, so we have to try and hold some extra space to
* compensate for our delayed ref generation. If we can't get
* that space then we need see if we can steal our minimum from
* the global reserve. We will be ratelimited by the amount of
* space we have for the delayed refs rsv, so we'll end up
* committing and trying again.
*/
trans = btrfs_join_transaction(root);
if (IS_ERR(trans) || !ret) {
if (!IS_ERR(trans)) {
trans->block_rsv = &fs_info->trans_block_rsv;
trans->bytes_reserved = delayed_refs_extra;
btrfs_block_rsv_migrate(rsv, trans->block_rsv,
delayed_refs_extra, 1);
}
return trans;
}
int ret;
/*
* Eviction should be taking place at some place safe because of our
* delayed iputs. However the normal flushing code will run delayed
* iputs, so we cannot use FLUSH_ALL otherwise we'll deadlock.
*
* We reserve the delayed_refs_extra here again because we can't use
* btrfs_start_transaction(root, 0) for the same deadlocky reason as
* above. We reserve our extra bit here because we generate a ton of
* delayed refs activity by truncating.
*
* If we cannot make our reservation we'll attempt to steal from the
* global reserve, because we really want to be able to free up space.
*/
ret = btrfs_block_rsv_refill(root, rsv, rsv->size + delayed_refs_extra,
BTRFS_RESERVE_FLUSH_EVICT);
if (ret) {
/*
* Try to steal from the global reserve if there is space for
* it.
*/
if (!btrfs_check_space_for_delayed_refs(fs_info) &&
!btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, 0))
return trans;
if (btrfs_check_space_for_delayed_refs(fs_info) ||
btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, 0)) {
btrfs_warn(fs_info,
"could not allocate space for delete; will truncate on mount");
return ERR_PTR(-ENOSPC);
}
delayed_refs_extra = 0;
}
/* If not, commit and try again. */
ret = btrfs_commit_transaction(trans);
if (ret)
return ERR_PTR(ret);
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return trans;
if (delayed_refs_extra) {
trans->block_rsv = &fs_info->trans_block_rsv;
trans->bytes_reserved = delayed_refs_extra;
btrfs_block_rsv_migrate(rsv, trans->block_rsv,
delayed_refs_extra, 1);
}
return trans;
}
void btrfs_evict_inode(struct inode *inode)
......
......@@ -848,6 +848,17 @@ static const enum btrfs_flush_state priority_flush_states[] = {
ALLOC_CHUNK,
};
static const enum btrfs_flush_state evict_flush_states[] = {
FLUSH_DELAYED_ITEMS_NR,
FLUSH_DELAYED_ITEMS,
FLUSH_DELAYED_REFS_NR,
FLUSH_DELAYED_REFS,
FLUSH_DELALLOC,
FLUSH_DELALLOC_WAIT,
ALLOC_CHUNK,
COMMIT_TRANS,
};
static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
struct reserve_ticket *ticket,
......@@ -922,12 +933,24 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
u64 reclaim_bytes = 0;
int ret;
if (flush == BTRFS_RESERVE_FLUSH_ALL)
switch (flush) {
case BTRFS_RESERVE_FLUSH_ALL:
wait_reserve_ticket(fs_info, space_info, ticket);
else
break;
case BTRFS_RESERVE_FLUSH_LIMIT:
priority_reclaim_metadata_space(fs_info, space_info, ticket,
priority_flush_states,
ARRAY_SIZE(priority_flush_states));
break;
case BTRFS_RESERVE_FLUSH_EVICT:
priority_reclaim_metadata_space(fs_info, space_info, ticket,
evict_flush_states,
ARRAY_SIZE(evict_flush_states));
break;
default:
ASSERT(0);
break;
}
spin_lock(&space_info->lock);
ret = ticket->error;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment