Commit 5df11363 authored by Josef Bacik's avatar Josef Bacik Committed by David Sterba

btrfs: loop in inode_rsv_refill

With severe fragmentation we can end up with our inode rsv size being
huge during writeout, which would cause us to need to make very large
metadata reservations.

However we may not actually need that much once writeout is complete,
because of the over-reservation for the worst case.

So instead try to make our reservation, and if we couldn't make it
re-calculate our new reservation size and try again.  If our reservation
size doesn't change between tries then we know we are actually out of
space and can error. Flushing that could have been running in parallel
did not make any space.
Signed-off-by: default avatarJosef Bacik <josef@toxicpanda.com>
[ rename to calc_refill_bytes, update comment and changelog ]
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent f91587e4
...@@ -5806,6 +5806,21 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, ...@@ -5806,6 +5806,21 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
return ret; return ret;
} }
static void calc_refill_bytes(struct btrfs_block_rsv *block_rsv,
u64 *metadata_bytes, u64 *qgroup_bytes)
{
*metadata_bytes = 0;
*qgroup_bytes = 0;
spin_lock(&block_rsv->lock);
if (block_rsv->reserved < block_rsv->size)
*metadata_bytes = block_rsv->size - block_rsv->reserved;
if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
*qgroup_bytes = block_rsv->qgroup_rsv_size -
block_rsv->qgroup_rsv_reserved;
spin_unlock(&block_rsv->lock);
}
/** /**
* btrfs_inode_rsv_refill - refill the inode block rsv. * btrfs_inode_rsv_refill - refill the inode block rsv.
* @inode - the inode we are refilling. * @inode - the inode we are refilling.
...@@ -5821,25 +5836,42 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, ...@@ -5821,25 +5836,42 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
{ {
struct btrfs_root *root = inode->root; struct btrfs_root *root = inode->root;
struct btrfs_block_rsv *block_rsv = &inode->block_rsv; struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
u64 num_bytes = 0; u64 num_bytes, last = 0;
u64 qgroup_num_bytes = 0; u64 qgroup_num_bytes;
int ret = -ENOSPC; int ret = -ENOSPC;
spin_lock(&block_rsv->lock); calc_refill_bytes(block_rsv, &num_bytes, &qgroup_num_bytes);
if (block_rsv->reserved < block_rsv->size)
num_bytes = block_rsv->size - block_rsv->reserved;
if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
qgroup_num_bytes = block_rsv->qgroup_rsv_size -
block_rsv->qgroup_rsv_reserved;
spin_unlock(&block_rsv->lock);
if (num_bytes == 0) if (num_bytes == 0)
return 0; return 0;
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true); do {
if (ret) ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes,
return ret; true);
ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); if (ret)
return ret;
ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
if (ret) {
btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
last = num_bytes;
/*
* If we are fragmented we can end up with a lot of
* outstanding extents which will make our size be much
* larger than our reserved amount.
*
* If the reservation happens here, it might be very
* big though not needed in the end, if the delalloc
* flushing happens.
*
* If this is the case try and do the reserve again.
*/
if (flush == BTRFS_RESERVE_FLUSH_ALL)
calc_refill_bytes(block_rsv, &num_bytes,
&qgroup_num_bytes);
if (num_bytes == 0)
return 0;
}
} while (ret && last != num_bytes);
if (!ret) { if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, false); block_rsv_add_bytes(block_rsv, num_bytes, false);
trace_btrfs_space_reservation(root->fs_info, "delalloc", trace_btrfs_space_reservation(root->fs_info, "delalloc",
...@@ -5849,8 +5881,7 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, ...@@ -5849,8 +5881,7 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
spin_lock(&block_rsv->lock); spin_lock(&block_rsv->lock);
block_rsv->qgroup_rsv_reserved += qgroup_num_bytes; block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
spin_unlock(&block_rsv->lock); spin_unlock(&block_rsv->lock);
} else }
btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment