Commit 718ecc50 authored by Dave Chinner's avatar Dave Chinner Committed by Darrick J. Wong

xfs: xfs_iflock is no longer a completion

With the recent rework of the inode cluster flushing, we no longer
ever wait on the the inode flush "lock". It was never a lock in the
first place, just a completion to allow callers to wait for inode IO
to complete. We now never wait for flush completion as all inode
flushing is non-blocking. Hence we can get rid of all the iflock
infrastructure and instead just set and check a state flag.

Rename the XFS_IFLOCK flag to XFS_IFLUSHING, convert all the
xfs_iflock_nowait() test-and-set operations on that flag, and
replace all the xfs_ifunlock() calls to clear operations.
Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
parent 771915c4
...@@ -52,7 +52,6 @@ xfs_inode_alloc( ...@@ -52,7 +52,6 @@ xfs_inode_alloc(
XFS_STATS_INC(mp, vn_active); XFS_STATS_INC(mp, vn_active);
ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!xfs_isiflocked(ip));
ASSERT(ip->i_ino == 0); ASSERT(ip->i_ino == 0);
/* initialise the xfs inode */ /* initialise the xfs inode */
...@@ -123,7 +122,7 @@ void ...@@ -123,7 +122,7 @@ void
xfs_inode_free( xfs_inode_free(
struct xfs_inode *ip) struct xfs_inode *ip)
{ {
ASSERT(!xfs_isiflocked(ip)); ASSERT(!xfs_iflags_test(ip, XFS_IFLUSHING));
/* /*
* Because we use RCU freeing we need to ensure the inode always * Because we use RCU freeing we need to ensure the inode always
...@@ -1035,23 +1034,21 @@ xfs_reclaim_inode( ...@@ -1035,23 +1034,21 @@ xfs_reclaim_inode(
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
goto out; goto out;
if (!xfs_iflock_nowait(ip)) if (xfs_iflags_test_and_set(ip, XFS_IFLUSHING))
goto out_iunlock; goto out_iunlock;
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
xfs_iunpin_wait(ip); xfs_iunpin_wait(ip);
/* xfs_iflush_abort() drops the flush lock */
xfs_iflush_abort(ip); xfs_iflush_abort(ip);
goto reclaim; goto reclaim;
} }
if (xfs_ipincount(ip)) if (xfs_ipincount(ip))
goto out_ifunlock; goto out_clear_flush;
if (!xfs_inode_clean(ip)) if (!xfs_inode_clean(ip))
goto out_ifunlock; goto out_clear_flush;
xfs_ifunlock(ip); xfs_iflags_clear(ip, XFS_IFLUSHING);
reclaim: reclaim:
ASSERT(!xfs_isiflocked(ip));
/* /*
* Because we use RCU freeing we need to ensure the inode always appears * Because we use RCU freeing we need to ensure the inode always appears
...@@ -1101,8 +1098,8 @@ xfs_reclaim_inode( ...@@ -1101,8 +1098,8 @@ xfs_reclaim_inode(
__xfs_inode_free(ip); __xfs_inode_free(ip);
return; return;
out_ifunlock: out_clear_flush:
xfs_ifunlock(ip); xfs_iflags_clear(ip, XFS_IFLUSHING);
out_iunlock: out_iunlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
out: out:
......
...@@ -598,22 +598,6 @@ xfs_lock_two_inodes( ...@@ -598,22 +598,6 @@ xfs_lock_two_inodes(
} }
} }
void
__xfs_iflock(
struct xfs_inode *ip)
{
wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
do {
prepare_to_wait_exclusive(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
if (xfs_isiflocked(ip))
io_schedule();
} while (!xfs_iflock_nowait(ip));
finish_wait(wq, &wait.wq_entry);
}
STATIC uint STATIC uint
_xfs_dic2xflags( _xfs_dic2xflags(
uint16_t di_flags, uint16_t di_flags,
...@@ -2531,11 +2515,8 @@ xfs_ifree_mark_inode_stale( ...@@ -2531,11 +2515,8 @@ xfs_ifree_mark_inode_stale(
* valid, the wrong inode or stale. * valid, the wrong inode or stale.
*/ */
spin_lock(&ip->i_flags_lock); spin_lock(&ip->i_flags_lock);
if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE)) { if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE))
spin_unlock(&ip->i_flags_lock); goto out_iflags_unlock;
rcu_read_unlock();
return;
}
/* /*
* Don't try to lock/unlock the current inode, but we _cannot_ skip the * Don't try to lock/unlock the current inode, but we _cannot_ skip the
...@@ -2552,16 +2533,14 @@ xfs_ifree_mark_inode_stale( ...@@ -2552,16 +2533,14 @@ xfs_ifree_mark_inode_stale(
} }
} }
ip->i_flags |= XFS_ISTALE; ip->i_flags |= XFS_ISTALE;
spin_unlock(&ip->i_flags_lock);
rcu_read_unlock();
/* /*
* If we can't get the flush lock, the inode is already attached. All * If the inode is flushing, it is already attached to the buffer. All
* we needed to do here is mark the inode stale so buffer IO completion * we needed to do here is mark the inode stale so buffer IO completion
* will remove it from the AIL. * will remove it from the AIL.
*/ */
iip = ip->i_itemp; iip = ip->i_itemp;
if (!xfs_iflock_nowait(ip)) { if (__xfs_iflags_test(ip, XFS_IFLUSHING)) {
ASSERT(!list_empty(&iip->ili_item.li_bio_list)); ASSERT(!list_empty(&iip->ili_item.li_bio_list));
ASSERT(iip->ili_last_fields); ASSERT(iip->ili_last_fields);
goto out_iunlock; goto out_iunlock;
...@@ -2573,10 +2552,12 @@ xfs_ifree_mark_inode_stale( ...@@ -2573,10 +2552,12 @@ xfs_ifree_mark_inode_stale(
* commit as the flock synchronises removal of the inode from the * commit as the flock synchronises removal of the inode from the
* cluster buffer against inode reclaim. * cluster buffer against inode reclaim.
*/ */
if (!iip || list_empty(&iip->ili_item.li_bio_list)) { if (!iip || list_empty(&iip->ili_item.li_bio_list))
xfs_ifunlock(ip);
goto out_iunlock; goto out_iunlock;
}
__xfs_iflags_set(ip, XFS_IFLUSHING);
spin_unlock(&ip->i_flags_lock);
rcu_read_unlock();
/* we have a dirty inode in memory that has not yet been flushed. */ /* we have a dirty inode in memory that has not yet been flushed. */
spin_lock(&iip->ili_lock); spin_lock(&iip->ili_lock);
...@@ -2586,9 +2567,16 @@ xfs_ifree_mark_inode_stale( ...@@ -2586,9 +2567,16 @@ xfs_ifree_mark_inode_stale(
spin_unlock(&iip->ili_lock); spin_unlock(&iip->ili_lock);
ASSERT(iip->ili_last_fields); ASSERT(iip->ili_last_fields);
if (ip != free_ip)
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return;
out_iunlock: out_iunlock:
if (ip != free_ip) if (ip != free_ip)
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
out_iflags_unlock:
spin_unlock(&ip->i_flags_lock);
rcu_read_unlock();
} }
/* /*
...@@ -2631,8 +2619,9 @@ xfs_ifree_cluster( ...@@ -2631,8 +2619,9 @@ xfs_ifree_cluster(
/* /*
* We obtain and lock the backing buffer first in the process * We obtain and lock the backing buffer first in the process
* here, as we have to ensure that any dirty inode that we * here to ensure dirty inodes attached to the buffer remain in
* can't get the flush lock on is attached to the buffer. * the flushing state while we mark them stale.
*
* If we scan the in-memory inodes first, then buffer IO can * If we scan the in-memory inodes first, then buffer IO can
* complete before we get a lock on it, and hence we may fail * complete before we get a lock on it, and hence we may fail
* to mark all the active inodes on the buffer stale. * to mark all the active inodes on the buffer stale.
...@@ -3443,7 +3432,7 @@ xfs_iflush( ...@@ -3443,7 +3432,7 @@ xfs_iflush(
int error; int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(xfs_isiflocked(ip)); ASSERT(xfs_iflags_test(ip, XFS_IFLUSHING));
ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE || ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE ||
ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
ASSERT(iip->ili_item.li_buf == bp); ASSERT(iip->ili_item.li_buf == bp);
...@@ -3613,7 +3602,7 @@ xfs_iflush_cluster( ...@@ -3613,7 +3602,7 @@ xfs_iflush_cluster(
/* /*
* Quick and dirty check to avoid locks if possible. * Quick and dirty check to avoid locks if possible.
*/ */
if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLOCK)) if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING))
continue; continue;
if (xfs_ipincount(ip)) if (xfs_ipincount(ip))
continue; continue;
...@@ -3627,7 +3616,7 @@ xfs_iflush_cluster( ...@@ -3627,7 +3616,7 @@ xfs_iflush_cluster(
*/ */
spin_lock(&ip->i_flags_lock); spin_lock(&ip->i_flags_lock);
ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE)); ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE));
if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLOCK)) { if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING)) {
spin_unlock(&ip->i_flags_lock); spin_unlock(&ip->i_flags_lock);
continue; continue;
} }
...@@ -3635,23 +3624,16 @@ xfs_iflush_cluster( ...@@ -3635,23 +3624,16 @@ xfs_iflush_cluster(
/* /*
* ILOCK will pin the inode against reclaim and prevent * ILOCK will pin the inode against reclaim and prevent
* concurrent transactions modifying the inode while we are * concurrent transactions modifying the inode while we are
* flushing the inode. * flushing the inode. If we get the lock, set the flushing
* state before we drop the i_flags_lock.
*/ */
if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
spin_unlock(&ip->i_flags_lock); spin_unlock(&ip->i_flags_lock);
continue; continue;
} }
__xfs_iflags_set(ip, XFS_IFLUSHING);
spin_unlock(&ip->i_flags_lock); spin_unlock(&ip->i_flags_lock);
/*
* Skip inodes that are already flush locked as they have
* already been written to the buffer.
*/
if (!xfs_iflock_nowait(ip)) {
xfs_iunlock(ip, XFS_ILOCK_SHARED);
continue;
}
/* /*
* Abort flushing this inode if we are shut down because the * Abort flushing this inode if we are shut down because the
* inode may not currently be in the AIL. This can occur when * inode may not currently be in the AIL. This can occur when
...@@ -3661,7 +3643,6 @@ xfs_iflush_cluster( ...@@ -3661,7 +3643,6 @@ xfs_iflush_cluster(
*/ */
if (XFS_FORCED_SHUTDOWN(mp)) { if (XFS_FORCED_SHUTDOWN(mp)) {
xfs_iunpin_wait(ip); xfs_iunpin_wait(ip);
/* xfs_iflush_abort() drops the flush lock */
xfs_iflush_abort(ip); xfs_iflush_abort(ip);
xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_iunlock(ip, XFS_ILOCK_SHARED);
error = -EIO; error = -EIO;
...@@ -3670,7 +3651,7 @@ xfs_iflush_cluster( ...@@ -3670,7 +3651,7 @@ xfs_iflush_cluster(
/* don't block waiting on a log force to unpin dirty inodes */ /* don't block waiting on a log force to unpin dirty inodes */
if (xfs_ipincount(ip)) { if (xfs_ipincount(ip)) {
xfs_ifunlock(ip); xfs_iflags_clear(ip, XFS_IFLUSHING);
xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_iunlock(ip, XFS_ILOCK_SHARED);
continue; continue;
} }
...@@ -3678,7 +3659,7 @@ xfs_iflush_cluster( ...@@ -3678,7 +3659,7 @@ xfs_iflush_cluster(
if (!xfs_inode_clean(ip)) if (!xfs_inode_clean(ip))
error = xfs_iflush(ip, bp); error = xfs_iflush(ip, bp);
else else
xfs_ifunlock(ip); xfs_iflags_clear(ip, XFS_IFLUSHING);
xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (error) if (error)
break; break;
......
...@@ -211,8 +211,7 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip) ...@@ -211,8 +211,7 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip)
#define XFS_INEW (1 << __XFS_INEW_BIT) #define XFS_INEW (1 << __XFS_INEW_BIT)
#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ #define XFS_IFLUSHING (1 << 7) /* inode is being flushed */
#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT)
#define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */ #define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */
#define XFS_IPINNED (1 << __XFS_IPINNED_BIT) #define XFS_IPINNED (1 << __XFS_IPINNED_BIT)
#define XFS_IEOFBLOCKS (1 << 9) /* has the preallocblocks tag set */ #define XFS_IEOFBLOCKS (1 << 9) /* has the preallocblocks tag set */
...@@ -233,36 +232,6 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip) ...@@ -233,36 +232,6 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip)
(XFS_IRECLAIMABLE | XFS_IRECLAIM | \ (XFS_IRECLAIMABLE | XFS_IRECLAIM | \
XFS_IDIRTY_RELEASE | XFS_ITRUNCATED) XFS_IDIRTY_RELEASE | XFS_ITRUNCATED)
/*
* Synchronize processes attempting to flush the in-core inode back to disk.
*/
static inline int xfs_isiflocked(struct xfs_inode *ip)
{
return xfs_iflags_test(ip, XFS_IFLOCK);
}
extern void __xfs_iflock(struct xfs_inode *ip);
static inline int xfs_iflock_nowait(struct xfs_inode *ip)
{
return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
}
static inline void xfs_iflock(struct xfs_inode *ip)
{
if (!xfs_iflock_nowait(ip))
__xfs_iflock(ip);
}
static inline void xfs_ifunlock(struct xfs_inode *ip)
{
ASSERT(xfs_isiflocked(ip));
xfs_iflags_clear(ip, XFS_IFLOCK);
smp_mb();
wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
}
/* /*
* Flags for inode locking. * Flags for inode locking.
* Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
......
...@@ -491,8 +491,7 @@ xfs_inode_item_push( ...@@ -491,8 +491,7 @@ xfs_inode_item_push(
(ip->i_flags & XFS_ISTALE)) (ip->i_flags & XFS_ISTALE))
return XFS_ITEM_PINNED; return XFS_ITEM_PINNED;
/* If the inode is already flush locked, we're already flushing. */ if (xfs_iflags_test(ip, XFS_IFLUSHING))
if (xfs_isiflocked(ip))
return XFS_ITEM_FLUSHING; return XFS_ITEM_FLUSHING;
if (!xfs_buf_trylock(bp)) if (!xfs_buf_trylock(bp))
...@@ -703,7 +702,7 @@ xfs_iflush_finish( ...@@ -703,7 +702,7 @@ xfs_iflush_finish(
iip->ili_last_fields = 0; iip->ili_last_fields = 0;
iip->ili_flush_lsn = 0; iip->ili_flush_lsn = 0;
spin_unlock(&iip->ili_lock); spin_unlock(&iip->ili_lock);
xfs_ifunlock(iip->ili_inode); xfs_iflags_clear(iip->ili_inode, XFS_IFLUSHING);
if (drop_buffer) if (drop_buffer)
xfs_buf_rele(bp); xfs_buf_rele(bp);
} }
...@@ -711,8 +710,8 @@ xfs_iflush_finish( ...@@ -711,8 +710,8 @@ xfs_iflush_finish(
/* /*
* Inode buffer IO completion routine. It is responsible for removing inodes * Inode buffer IO completion routine. It is responsible for removing inodes
* attached to the buffer from the AIL if they have not been re-logged, as well * attached to the buffer from the AIL if they have not been re-logged and
* as completing the flush and unlocking the inode. * completing the inode flush.
*/ */
void void
xfs_iflush_done( xfs_iflush_done(
...@@ -755,10 +754,10 @@ xfs_iflush_done( ...@@ -755,10 +754,10 @@ xfs_iflush_done(
} }
/* /*
* This is the inode flushing abort routine. It is called from xfs_iflush when * This is the inode flushing abort routine. It is called when
* the filesystem is shutting down to clean up the inode state. It is * the filesystem is shutting down to clean up the inode state. It is
* responsible for removing the inode item from the AIL if it has not been * responsible for removing the inode item from the AIL if it has not been
* re-logged, and unlocking the inode's flush lock. * re-logged and clearing the inode's flush state.
*/ */
void void
xfs_iflush_abort( xfs_iflush_abort(
...@@ -790,7 +789,7 @@ xfs_iflush_abort( ...@@ -790,7 +789,7 @@ xfs_iflush_abort(
list_del_init(&iip->ili_item.li_bio_list); list_del_init(&iip->ili_item.li_bio_list);
spin_unlock(&iip->ili_lock); spin_unlock(&iip->ili_lock);
} }
xfs_ifunlock(ip); xfs_iflags_clear(ip, XFS_IFLUSHING);
if (bp) if (bp)
xfs_buf_rele(bp); xfs_buf_rele(bp);
} }
......
...@@ -25,8 +25,8 @@ struct xfs_inode_log_item { ...@@ -25,8 +25,8 @@ struct xfs_inode_log_item {
* *
* We need atomic changes between inode dirtying, inode flushing and * We need atomic changes between inode dirtying, inode flushing and
* inode completion, but these all hold different combinations of * inode completion, but these all hold different combinations of
* ILOCK and iflock and hence we need some other method of serialising * ILOCK and IFLUSHING and hence we need some other method of
* updates to the flush state. * serialising updates to the flush state.
*/ */
spinlock_t ili_lock; /* flush state lock */ spinlock_t ili_lock; /* flush state lock */
unsigned int ili_last_fields; /* fields when flushed */ unsigned int ili_last_fields; /* fields when flushed */
......
...@@ -1059,11 +1059,12 @@ xfs_unmountfs( ...@@ -1059,11 +1059,12 @@ xfs_unmountfs(
* We can potentially deadlock here if we have an inode cluster * We can potentially deadlock here if we have an inode cluster
* that has been freed has its buffer still pinned in memory because * that has been freed has its buffer still pinned in memory because
* the transaction is still sitting in a iclog. The stale inodes * the transaction is still sitting in a iclog. The stale inodes
* on that buffer will have their flush locks held until the * on that buffer will be pinned to the buffer until the
* transaction hits the disk and the callbacks run. the inode * transaction hits the disk and the callbacks run. Pushing the AIL will
* flush takes the flush lock unconditionally and with nothing to * skip the stale inodes and may never see the pinned buffer, so
* push out the iclog we will never get that unlocked. hence we * nothing will push out the iclog and unpin the buffer. Hence we
* need to force the log first. * need to force the log here to ensure all items are flushed into the
* AIL before we go any further.
*/ */
xfs_log_force(mp, XFS_LOG_SYNC); xfs_log_force(mp, XFS_LOG_SYNC);
......
...@@ -654,11 +654,11 @@ xfs_fs_destroy_inode( ...@@ -654,11 +654,11 @@ xfs_fs_destroy_inode(
ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
/* /*
* We always use background reclaim here because even if the * We always use background reclaim here because even if the inode is
* inode is clean, it still may be under IO and hence we have * clean, it still may be under IO and hence we have wait for IO
* to take the flush lock. The background reclaim path handles * completion to occur before we can reclaim the inode. The background
* this more efficiently than we can here, so simply let background * reclaim path handles this more efficiently than we can here, so
* reclaim tear down all inodes. * simply let background reclaim tear down all inodes.
*/ */
xfs_inode_set_reclaim_tag(ip); xfs_inode_set_reclaim_tag(ip);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment