Commit b1de6fc7 authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: fix log reservation overflows when allocating large rt extents

Omar Sandoval reported that a 4G fallocate on the realtime device causes
filesystem shutdowns due to a log reservation overflow that happens when
we log the rtbitmap updates.  Factor rtbitmap/rtsummary updates into the
the tr_write and tr_itruncate log reservation calculation.

"The following reproducer results in a transaction log overrun warning
for me:

    mkfs.xfs -f -r rtdev=/dev/vdc -d rtinherit=1 -m reflink=0 /dev/vdb
    mount -o rtdev=/dev/vdc /dev/vdb /mnt
    fallocate -l 4G /mnt/foo
Reported-by: default avatarOmar Sandoval <osandov@osandov.com>
Tested-by: default avatarOmar Sandoval <osandov@osandov.com>
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: default avatarBrian Foster <bfoster@redhat.com>
parent d0c22041
...@@ -196,6 +196,24 @@ xfs_calc_inode_chunk_res( ...@@ -196,6 +196,24 @@ xfs_calc_inode_chunk_res(
return res; return res;
} }
/*
* Per-extent log reservation for the btree changes involved in freeing or
* allocating a realtime extent. We have to be able to log as many rtbitmap
* blocks as needed to mark inuse MAXEXTLEN blocks' worth of realtime extents,
* as well as the realtime summary block.
*/
unsigned int
xfs_rtalloc_log_count(
struct xfs_mount *mp,
unsigned int num_ops)
{
unsigned int blksz = XFS_FSB_TO_B(mp, 1);
unsigned int rtbmp_bytes;
rtbmp_bytes = (MAXEXTLEN / mp->m_sb.sb_rextsize) / NBBY;
return (howmany(rtbmp_bytes, blksz) + 1) * num_ops;
}
/* /*
* Various log reservation values. * Various log reservation values.
* *
...@@ -218,13 +236,21 @@ xfs_calc_inode_chunk_res( ...@@ -218,13 +236,21 @@ xfs_calc_inode_chunk_res(
/* /*
* In a write transaction we can allocate a maximum of 2 * In a write transaction we can allocate a maximum of 2
* extents. This gives: * extents. This gives (t1):
* the inode getting the new extents: inode size * the inode getting the new extents: inode size
* the inode's bmap btree: max depth * block size * the inode's bmap btree: max depth * block size
* the agfs of the ags from which the extents are allocated: 2 * sector * the agfs of the ags from which the extents are allocated: 2 * sector
* the superblock free block counter: sector size * the superblock free block counter: sector size
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
* And the bmap_finish transaction can free bmap blocks in a join: * Or, if we're writing to a realtime file (t2):
* the inode getting the new extents: inode size
* the inode's bmap btree: max depth * block size
* the agfs of the ags from which the extents are allocated: 2 * sector
* the superblock free block counter: sector size
* the realtime bitmap: ((MAXEXTLEN / rtextsize) / NBBY) bytes
* the realtime summary: 1 block
* the allocation btrees: 2 trees * (2 * max depth - 1) * block size
* And the bmap_finish transaction can free bmap blocks in a join (t3):
* the agfs of the ags containing the blocks: 2 * sector size * the agfs of the ags containing the blocks: 2 * sector size
* the agfls of the ags containing the blocks: 2 * sector size * the agfls of the ags containing the blocks: 2 * sector size
* the super block free block counter: sector size * the super block free block counter: sector size
...@@ -234,40 +260,72 @@ STATIC uint ...@@ -234,40 +260,72 @@ STATIC uint
xfs_calc_write_reservation( xfs_calc_write_reservation(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
return XFS_DQUOT_LOGRES(mp) + unsigned int t1, t2, t3;
max((xfs_calc_inode_res(mp, 1) + unsigned int blksz = XFS_FSB_TO_B(mp, 1);
t1 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
t2 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
XFS_FSB_TO_B(mp, 1)) + blksz) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 1), blksz) +
XFS_FSB_TO_B(mp, 1))), xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), blksz);
(xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + } else {
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), t2 = 0;
XFS_FSB_TO_B(mp, 1)))); }
t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
} }
/* /*
* In truncating a file we free up to two extents at once. We can modify: * In truncating a file we free up to two extents at once. We can modify (t1):
* the inode being truncated: inode size * the inode being truncated: inode size
* the inode's bmap btree: (max depth + 1) * block size * the inode's bmap btree: (max depth + 1) * block size
* And the bmap_finish transaction can free the blocks and bmap blocks: * And the bmap_finish transaction can free the blocks and bmap blocks (t2):
* the agf for each of the ags: 4 * sector size * the agf for each of the ags: 4 * sector size
* the agfl for each of the ags: 4 * sector size * the agfl for each of the ags: 4 * sector size
* the super block to reflect the freed blocks: sector size * the super block to reflect the freed blocks: sector size
* worst case split in allocation btrees per extent assuming 4 extents: * worst case split in allocation btrees per extent assuming 4 extents:
* 4 exts * 2 trees * (2 * max depth - 1) * block size * 4 exts * 2 trees * (2 * max depth - 1) * block size
* Or, if it's a realtime file (t3):
* the agf for each of the ags: 2 * sector size
* the agfl for each of the ags: 2 * sector size
* the super block to reflect the freed blocks: sector size
* the realtime bitmap: 2 exts * ((MAXEXTLEN / rtextsize) / NBBY) bytes
* the realtime summary: 2 exts * 1 block
* worst case split in allocation btrees per extent assuming 2 extents:
* 2 exts * 2 trees * (2 * max depth - 1) * block size
*/ */
STATIC uint STATIC uint
xfs_calc_itruncate_reservation( xfs_calc_itruncate_reservation(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
return XFS_DQUOT_LOGRES(mp) + unsigned int t1, t2, t3;
max((xfs_calc_inode_res(mp, 1) + unsigned int blksz = XFS_FSB_TO_B(mp, 1);
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
XFS_FSB_TO_B(mp, 1))), t1 = xfs_calc_inode_res(mp, 1) +
(xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
XFS_FSB_TO_B(mp, 1)))); t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz);
if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
} else {
t3 = 0;
}
return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment