Commit 6785073b authored by David Chinner's avatar David Chinner Committed by Lachlan McIlroy

[XFS] Use KM_NOFS for incore inode extent tree allocation V2

If we allow incore extent tree allocations to recurse into the
filesystem under memory pressure, new delayed allocations through
xfs_iomap_write_delay() can deadlock on themselves if memory
reclaim tries to write back dirty pages from that inode.

It will deadlock in xfs_iomap_write_allocate() trying to take the
ilock we already hold. This can also show up as complex ABBA deadlocks
when multiple threads are triggering memory reclaim when trying to
allocate extents.

The main cause of this is the fact that delayed allocation is not done in
a transaction, so KM_NOFS is not automatically added to the allocations to
prevent this recursion.

Mark all allocations done for the incore inode extent tree as KM_NOFS to
ensure they never recurse back into the filesystem.

Version 2: o KM_NOFS implies KM_SLEEP, so just use KM_NOFS

SGI-PV: 981498

SGI-Modid: xfs-linux-melb:xfs-kern:31726a
Signed-off-by: default avatarDavid Chinner <david@fromorbit.com>
Signed-off-by: default avatarNiv Sardi <xaiki@sgi.com>
Signed-off-by: default avatarChristoph Hellwig <hch@infradead.org>
Signed-off-by: default avatarLachlan McIlroy <lachlan@sgi.com>
parent e6064d30
...@@ -3707,7 +3707,7 @@ xfs_iext_add_indirect_multi( ...@@ -3707,7 +3707,7 @@ xfs_iext_add_indirect_multi(
* (all extents past */ * (all extents past */
if (nex2) { if (nex2) {
byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP); nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
erp->er_extcount -= nex2; erp->er_extcount -= nex2;
xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
...@@ -4007,8 +4007,7 @@ xfs_iext_realloc_direct( ...@@ -4007,8 +4007,7 @@ xfs_iext_realloc_direct(
ifp->if_u1.if_extents = ifp->if_u1.if_extents =
kmem_realloc(ifp->if_u1.if_extents, kmem_realloc(ifp->if_u1.if_extents,
rnew_size, rnew_size,
ifp->if_real_bytes, ifp->if_real_bytes, KM_NOFS);
KM_SLEEP);
} }
if (rnew_size > ifp->if_real_bytes) { if (rnew_size > ifp->if_real_bytes) {
memset(&ifp->if_u1.if_extents[ifp->if_bytes / memset(&ifp->if_u1.if_extents[ifp->if_bytes /
...@@ -4067,7 +4066,7 @@ xfs_iext_inline_to_direct( ...@@ -4067,7 +4066,7 @@ xfs_iext_inline_to_direct(
xfs_ifork_t *ifp, /* inode fork pointer */ xfs_ifork_t *ifp, /* inode fork pointer */
int new_size) /* number of extents in file */ int new_size) /* number of extents in file */
{ {
ifp->if_u1.if_extents = kmem_alloc(new_size, KM_SLEEP); ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
memset(ifp->if_u1.if_extents, 0, new_size); memset(ifp->if_u1.if_extents, 0, new_size);
if (ifp->if_bytes) { if (ifp->if_bytes) {
memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
...@@ -4099,7 +4098,7 @@ xfs_iext_realloc_indirect( ...@@ -4099,7 +4098,7 @@ xfs_iext_realloc_indirect(
} else { } else {
ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
kmem_realloc(ifp->if_u1.if_ext_irec, kmem_realloc(ifp->if_u1.if_ext_irec,
new_size, size, KM_SLEEP); new_size, size, KM_NOFS);
} }
} }
...@@ -4341,11 +4340,10 @@ xfs_iext_irec_init( ...@@ -4341,11 +4340,10 @@ xfs_iext_irec_init(
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
ASSERT(nextents <= XFS_LINEAR_EXTS); ASSERT(nextents <= XFS_LINEAR_EXTS);
erp = (xfs_ext_irec_t *) erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP);
if (nextents == 0) { if (nextents == 0) {
ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
} else if (!ifp->if_real_bytes) { } else if (!ifp->if_real_bytes) {
xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
...@@ -4393,7 +4391,7 @@ xfs_iext_irec_new( ...@@ -4393,7 +4391,7 @@ xfs_iext_irec_new(
/* Initialize new extent record */ /* Initialize new extent record */
erp = ifp->if_u1.if_ext_irec; erp = ifp->if_u1.if_ext_irec;
erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
erp[erp_idx].er_extcount = 0; erp[erp_idx].er_extcount = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment