Commit 474fce06 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Ben Myers

xfs: replace i_flock with a sleeping bitlock

We almost never block on i_flock, the exception is synchronous inode
flushing.  Instead of bloating the inode with a 16/24-byte completion
that we abuse as a semaphore just implement it as a bitlock that uses
a bit waitqueue for the rare sleeping path.  This primarily is a
tradeoff between a much smaller inode and a faster non-blocking
path vs faster wakeups, and we are much better off with the former.

A small downside is that we will lose lockdep checking for i_flock, but
given that it's always taken inside the ilock that should be acceptable.

Note that for example the inode writeback locking is implemented in a
very similar way.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarAlex Elder <aelder@sgi.com>
Signed-off-by: default avatarBen Myers <bpm@sgi.com>
parent 49e4c70e
...@@ -77,7 +77,7 @@ xfs_inode_alloc( ...@@ -77,7 +77,7 @@ xfs_inode_alloc(
ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush)); ASSERT(!xfs_isiflocked(ip));
ASSERT(ip->i_ino == 0); ASSERT(ip->i_ino == 0);
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
...@@ -150,7 +150,7 @@ xfs_inode_free( ...@@ -150,7 +150,7 @@ xfs_inode_free(
/* asserts to verify all state is correct here */ /* asserts to verify all state is correct here */
ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush)); ASSERT(!xfs_isiflocked(ip));
/* /*
* Because we use RCU freeing we need to ensure the inode always * Because we use RCU freeing we need to ensure the inode always
...@@ -713,3 +713,19 @@ xfs_isilocked( ...@@ -713,3 +713,19 @@ xfs_isilocked(
return 0; return 0;
} }
#endif #endif
void
__xfs_iflock(
struct xfs_inode *ip)
{
wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
do {
prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
if (xfs_isiflocked(ip))
io_schedule();
} while (!xfs_iflock_nowait(ip));
finish_wait(wq, &wait.wait);
}
...@@ -2396,7 +2396,7 @@ xfs_iflush( ...@@ -2396,7 +2396,7 @@ xfs_iflush(
XFS_STATS_INC(xs_iflush_count); XFS_STATS_INC(xs_iflush_count);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(!completion_done(&ip->i_flush)); ASSERT(xfs_isiflocked(ip));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
...@@ -2512,7 +2512,7 @@ xfs_iflush_int( ...@@ -2512,7 +2512,7 @@ xfs_iflush_int(
#endif #endif
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(!completion_done(&ip->i_flush)); ASSERT(xfs_isiflocked(ip));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
......
...@@ -237,7 +237,6 @@ typedef struct xfs_inode { ...@@ -237,7 +237,6 @@ typedef struct xfs_inode {
struct xfs_inode_log_item *i_itemp; /* logging information */ struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */ mrlock_t i_lock; /* inode lock */
mrlock_t i_iolock; /* inode IO lock */ mrlock_t i_iolock; /* inode IO lock */
struct completion i_flush; /* inode flush completion q */
atomic_t i_pincount; /* inode pin count */ atomic_t i_pincount; /* inode pin count */
wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
spinlock_t i_flags_lock; /* inode i_flags lock */ spinlock_t i_flags_lock; /* inode i_flags lock */
...@@ -324,6 +323,19 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) ...@@ -324,6 +323,19 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
return ret; return ret;
} }
static inline int
xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags)
{
int ret;
spin_lock(&ip->i_flags_lock);
ret = ip->i_flags & flags;
if (!ret)
ip->i_flags |= flags;
spin_unlock(&ip->i_flags_lock);
return ret;
}
/* /*
* Project quota id helpers (previously projid was 16bit only * Project quota id helpers (previously projid was 16bit only
* and using two 16bit values to hold new 32bit projid was chosen * and using two 16bit values to hold new 32bit projid was chosen
...@@ -343,36 +355,18 @@ xfs_set_projid(struct xfs_inode *ip, ...@@ -343,36 +355,18 @@ xfs_set_projid(struct xfs_inode *ip,
ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff); ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
} }
/*
* Manage the i_flush queue embedded in the inode. This completion
* queue synchronizes processes attempting to flush the in-core
* inode back to disk.
*/
static inline void xfs_iflock(xfs_inode_t *ip)
{
wait_for_completion(&ip->i_flush);
}
static inline int xfs_iflock_nowait(xfs_inode_t *ip)
{
return try_wait_for_completion(&ip->i_flush);
}
static inline void xfs_ifunlock(xfs_inode_t *ip)
{
complete(&ip->i_flush);
}
/* /*
* In-core inode flags. * In-core inode flags.
*/ */
#define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */ #define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */
#define XFS_ISTALE 0x0002 /* inode has been staled */ #define XFS_ISTALE (1 << 1) /* inode has been staled */
#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ #define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */
#define XFS_INEW 0x0008 /* inode has just been allocated */ #define XFS_INEW (1 << 3) /* inode has just been allocated */
#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ #define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */
#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
#define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */ #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT)
/* /*
* Per-lifetime flags need to be reset when re-using a reclaimable inode during * Per-lifetime flags need to be reset when re-using a reclaimable inode during
...@@ -384,6 +378,34 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) ...@@ -384,6 +378,34 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \ XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \
XFS_IFILESTREAM); XFS_IFILESTREAM);
/*
* Synchronize processes attempting to flush the in-core inode back to disk.
*/
extern void __xfs_iflock(struct xfs_inode *ip);
static inline int xfs_iflock_nowait(struct xfs_inode *ip)
{
return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
}
static inline void xfs_iflock(struct xfs_inode *ip)
{
if (!xfs_iflock_nowait(ip))
__xfs_iflock(ip);
}
static inline void xfs_ifunlock(struct xfs_inode *ip)
{
xfs_iflags_clear(ip, XFS_IFLOCK);
wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
}
static inline int xfs_isiflocked(struct xfs_inode *ip)
{
return xfs_iflags_test(ip, XFS_IFLOCK);
}
/* /*
* Flags for inode locking. * Flags for inode locking.
* Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
......
...@@ -717,7 +717,7 @@ xfs_inode_item_pushbuf( ...@@ -717,7 +717,7 @@ xfs_inode_item_pushbuf(
* If a flush is not in progress anymore, chances are that the * If a flush is not in progress anymore, chances are that the
* inode was taken off the AIL. So, just get out. * inode was taken off the AIL. So, just get out.
*/ */
if (completion_done(&ip->i_flush) || if (!xfs_isiflocked(ip) ||
!(lip->li_flags & XFS_LI_IN_AIL)) { !(lip->li_flags & XFS_LI_IN_AIL)) {
xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_iunlock(ip, XFS_ILOCK_SHARED);
return true; return true;
...@@ -750,7 +750,7 @@ xfs_inode_item_push( ...@@ -750,7 +750,7 @@ xfs_inode_item_push(
struct xfs_inode *ip = iip->ili_inode; struct xfs_inode *ip = iip->ili_inode;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
ASSERT(!completion_done(&ip->i_flush)); ASSERT(xfs_isiflocked(ip));
/* /*
* Since we were able to lock the inode's flush lock and * Since we were able to lock the inode's flush lock and
......
...@@ -829,13 +829,6 @@ xfs_fs_inode_init_once( ...@@ -829,13 +829,6 @@ xfs_fs_inode_init_once(
atomic_set(&ip->i_pincount, 0); atomic_set(&ip->i_pincount, 0);
spin_lock_init(&ip->i_flags_lock); spin_lock_init(&ip->i_flags_lock);
init_waitqueue_head(&ip->i_ipin_wait); init_waitqueue_head(&ip->i_ipin_wait);
/*
* Because we want to use a counting completion, complete
* the flush completion once to allow a single access to
* the flush completion without blocking.
*/
init_completion(&ip->i_flush);
complete(&ip->i_flush);
mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
"xfsino", ip->i_ino); "xfsino", ip->i_ino);
......
...@@ -707,14 +707,13 @@ xfs_reclaim_inode_grab( ...@@ -707,14 +707,13 @@ xfs_reclaim_inode_grab(
return 1; return 1;
/* /*
* do some unlocked checks first to avoid unnecessary lock traffic. * If we are asked for non-blocking operation, do unlocked checks to
* The first is a flush lock check, the second is a already in reclaim * see if the inode already is being flushed or in reclaim to avoid
* check. Only do these checks if we are not going to block on locks. * lock traffic.
*/ */
if ((flags & SYNC_TRYLOCK) && if ((flags & SYNC_TRYLOCK) &&
(!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) { __xfs_iflags_test(ip, XFS_IFLOCK | XFS_IRECLAIM))
return 1; return 1;
}
/* /*
* The radix tree lock here protects a thread in xfs_iget from racing * The radix tree lock here protects a thread in xfs_iget from racing
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment