Commit 8a48088f authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Ben Myers

xfs: don't flush inodes from background inode reclaim

We already flush dirty inodes throug the AIL regularly, there is no reason
to have second thread compete with it and disturb the I/O pattern.  We still
do write inodes when doing a synchronous reclaim from the shrinker or during
unmount for now.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarMark Tinguely <tinguely@sgi.com>
Signed-off-by: default avatarBen Myers <bpm@sgi.com>
parent 211e4d43
...@@ -631,11 +631,8 @@ xfs_reclaim_inode_grab( ...@@ -631,11 +631,8 @@ xfs_reclaim_inode_grab(
} }
/* /*
* Inodes in different states need to be treated differently, and the return * Inodes in different states need to be treated differently. The following
* value of xfs_iflush is not sufficient to get this right. The following table * table lists the inode states and the reclaim actions necessary:
* lists the inode states and the reclaim actions necessary for non-blocking
* reclaim:
*
* *
* inode state iflush ret required action * inode state iflush ret required action
* --------------- ---------- --------------- * --------------- ---------- ---------------
...@@ -645,9 +642,8 @@ xfs_reclaim_inode_grab( ...@@ -645,9 +642,8 @@ xfs_reclaim_inode_grab(
* stale, unpinned 0 reclaim * stale, unpinned 0 reclaim
* clean, pinned(*) 0 requeue * clean, pinned(*) 0 requeue
* stale, pinned EAGAIN requeue * stale, pinned EAGAIN requeue
* dirty, delwri ok 0 requeue * dirty, async - requeue
* dirty, delwri blocked EAGAIN requeue * dirty, sync 0 reclaim
* dirty, sync flush 0 reclaim
* *
* (*) dgc: I don't think the clean, pinned state is possible but it gets * (*) dgc: I don't think the clean, pinned state is possible but it gets
* handled anyway given the order of checks implemented. * handled anyway given the order of checks implemented.
...@@ -658,26 +654,23 @@ xfs_reclaim_inode_grab( ...@@ -658,26 +654,23 @@ xfs_reclaim_inode_grab(
* *
* Also, because we get the flush lock first, we know that any inode that has * Also, because we get the flush lock first, we know that any inode that has
* been flushed delwri has had the flush completed by the time we check that * been flushed delwri has had the flush completed by the time we check that
* the inode is clean. The clean inode check needs to be done before flushing * the inode is clean.
* the inode delwri otherwise we would loop forever requeuing clean inodes as
* we cannot tell apart a successful delwri flush and a clean inode from the
* return value of xfs_iflush().
* *
* Note that because the inode is flushed delayed write by background * Note that because the inode is flushed delayed write by AIL pushing, the
* writeback, the flush lock may already be held here and waiting on it can * flush lock may already be held here and waiting on it can result in very
* result in very long latencies. Hence for sync reclaims, where we wait on the * long latencies. Hence for sync reclaims, where we wait on the flush lock,
* flush lock, the caller should push out delayed write inodes first before * the caller should push the AIL first before trying to reclaim inodes to
* trying to reclaim them to minimise the amount of time spent waiting. For * minimise the amount of time spent waiting. For background relaim, we only
* background relaim, we just requeue the inode for the next pass. * bother to reclaim clean inodes anyway.
* *
* Hence the order of actions after gaining the locks should be: * Hence the order of actions after gaining the locks should be:
* bad => reclaim * bad => reclaim
* shutdown => unpin and reclaim * shutdown => unpin and reclaim
* pinned, delwri => requeue * pinned, async => requeue
* pinned, sync => unpin * pinned, sync => unpin
* stale => reclaim * stale => reclaim
* clean => reclaim * clean => reclaim
* dirty, delwri => flush and requeue * dirty, async => requeue
* dirty, sync => flush, wait and reclaim * dirty, sync => flush, wait and reclaim
*/ */
STATIC int STATIC int
...@@ -716,10 +709,8 @@ xfs_reclaim_inode( ...@@ -716,10 +709,8 @@ xfs_reclaim_inode(
goto reclaim; goto reclaim;
} }
if (xfs_ipincount(ip)) { if (xfs_ipincount(ip)) {
if (!(sync_mode & SYNC_WAIT)) { if (!(sync_mode & SYNC_WAIT))
xfs_ifunlock(ip); goto out_ifunlock;
goto out;
}
xfs_iunpin_wait(ip); xfs_iunpin_wait(ip);
} }
if (xfs_iflags_test(ip, XFS_ISTALE)) if (xfs_iflags_test(ip, XFS_ISTALE))
...@@ -727,6 +718,13 @@ xfs_reclaim_inode( ...@@ -727,6 +718,13 @@ xfs_reclaim_inode(
if (xfs_inode_clean(ip)) if (xfs_inode_clean(ip))
goto reclaim; goto reclaim;
/*
* Never flush out dirty data during non-blocking reclaim, as it would
* just contend with AIL pushing trying to do the same job.
*/
if (!(sync_mode & SYNC_WAIT))
goto out_ifunlock;
/* /*
* Now we have an inode that needs flushing. * Now we have an inode that needs flushing.
* *
...@@ -745,7 +743,6 @@ xfs_reclaim_inode( ...@@ -745,7 +743,6 @@ xfs_reclaim_inode(
* pass through will see the stale flag set on the inode. * pass through will see the stale flag set on the inode.
*/ */
error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
if (sync_mode & SYNC_WAIT) {
if (error == EAGAIN) { if (error == EAGAIN) {
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
/* backoff longer than in xfs_ifree_cluster */ /* backoff longer than in xfs_ifree_cluster */
...@@ -753,34 +750,6 @@ xfs_reclaim_inode( ...@@ -753,34 +750,6 @@ xfs_reclaim_inode(
goto restart; goto restart;
} }
xfs_iflock(ip); xfs_iflock(ip);
goto reclaim;
}
/*
* When we have to flush an inode but don't have SYNC_WAIT set, we
* flush the inode out using a delwri buffer and wait for the next
* call into reclaim to find it in a clean state instead of waiting for
* it now. We also don't return errors here - if the error is transient
* then the next reclaim pass will flush the inode, and if the error
* is permanent then the next sync reclaim will reclaim the inode and
* pass on the error.
*/
if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
xfs_warn(ip->i_mount,
"inode 0x%llx background reclaim flush failed with %d",
(long long)ip->i_ino, error);
}
out:
xfs_iflags_clear(ip, XFS_IRECLAIM);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
/*
* We could return EAGAIN here to make reclaim rescan the inode tree in
* a short while. However, this just burns CPU time scanning the tree
* waiting for IO to complete and xfssyncd never goes back to the idle
* state. Instead, return 0 to let the next scheduled background reclaim
* attempt to reclaim the inode again.
*/
return 0;
reclaim: reclaim:
xfs_ifunlock(ip); xfs_ifunlock(ip);
...@@ -814,8 +783,21 @@ xfs_reclaim_inode( ...@@ -814,8 +783,21 @@ xfs_reclaim_inode(
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_inode_free(ip); xfs_inode_free(ip);
return error; return error;
out_ifunlock:
xfs_ifunlock(ip);
out:
xfs_iflags_clear(ip, XFS_IRECLAIM);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
/*
* We could return EAGAIN here to make reclaim rescan the inode tree in
* a short while. However, this just burns CPU time scanning the tree
* waiting for IO to complete and xfssyncd never goes back to the idle
* state. Instead, return 0 to let the next scheduled background reclaim
* attempt to reclaim the inode again.
*/
return 0;
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment