Commit 9e8990de authored by Andreas Gruenbacher's avatar Andreas Gruenbacher

gfs2: Smarter iopen glock waiting

When trying to upgrade the iopen glock from a shared to an exclusive lock in
gfs2_evict_inode, abort the wait if there is contention on the corresponding
inode glock: in that case, the inode must still be in active use on another
node, and we're not guaranteed to get the iopen glock anytime soon.

To make this work even better, when we notice contention on the iopen glock and
we can't evict the corresponsing inode and release the iopen glock immediately,
poke the inode glock.  The other node(s) trying to acquire the lock can then
abort instead of timing out.

Thanks to Heinz Mauelshagen for pointing out a locking bug in a previous
version of this patch.
Signed-off-by: default avatarAndreas Gruenbacher <agruenba@redhat.com>
parent 35b6f8fb
...@@ -783,6 +783,17 @@ bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation) ...@@ -783,6 +783,17 @@ bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation)
return generation <= be64_to_cpu(ri->ri_generation_deleted); return generation <= be64_to_cpu(ri->ri_generation_deleted);
} }
static void gfs2_glock_poke(struct gfs2_glock *gl)
{
int flags = LM_FLAG_TRY_1CB | LM_FLAG_ANY | GL_SKIP;
struct gfs2_holder gh;
int error;
error = gfs2_glock_nq_init(gl, LM_ST_SHARED, flags, &gh);
if (!error)
gfs2_glock_dq(&gh);
}
static bool gfs2_try_evict(struct gfs2_glock *gl) static bool gfs2_try_evict(struct gfs2_glock *gl)
{ {
struct gfs2_inode *ip; struct gfs2_inode *ip;
...@@ -804,6 +815,8 @@ static bool gfs2_try_evict(struct gfs2_glock *gl) ...@@ -804,6 +815,8 @@ static bool gfs2_try_evict(struct gfs2_glock *gl)
ip = NULL; ip = NULL;
spin_unlock(&gl->gl_lockref.lock); spin_unlock(&gl->gl_lockref.lock);
if (ip) { if (ip) {
struct gfs2_glock *inode_gl = NULL;
gl->gl_no_formal_ino = ip->i_no_formal_ino; gl->gl_no_formal_ino = ip->i_no_formal_ino;
set_bit(GIF_DEFERRED_DELETE, &ip->i_flags); set_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
d_prune_aliases(&ip->i_inode); d_prune_aliases(&ip->i_inode);
...@@ -812,9 +825,16 @@ static bool gfs2_try_evict(struct gfs2_glock *gl) ...@@ -812,9 +825,16 @@ static bool gfs2_try_evict(struct gfs2_glock *gl)
/* If the inode was evicted, gl->gl_object will now be NULL. */ /* If the inode was evicted, gl->gl_object will now be NULL. */
spin_lock(&gl->gl_lockref.lock); spin_lock(&gl->gl_lockref.lock);
ip = gl->gl_object; ip = gl->gl_object;
if (ip) if (ip) {
inode_gl = ip->i_gl;
lockref_get(&inode_gl->gl_lockref);
clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags); clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
}
spin_unlock(&gl->gl_lockref.lock); spin_unlock(&gl->gl_lockref.lock);
if (inode_gl) {
gfs2_glock_poke(inode_gl);
gfs2_glock_put(inode_gl);
}
evicted = !ip; evicted = !ip;
} }
return evicted; return evicted;
...@@ -845,12 +865,22 @@ static void delete_work_func(struct work_struct *work) ...@@ -845,12 +865,22 @@ static void delete_work_func(struct work_struct *work)
* has happened. Otherwise, if we cause contention on the inode glock * has happened. Otherwise, if we cause contention on the inode glock
* immediately, the remote node will think that we still have * immediately, the remote node will think that we still have
* the inode in use, and so it will give up waiting. * the inode in use, and so it will give up waiting.
*
* If we can't evict the inode, signal to the remote node that
* the inode is still in use. We'll later try to delete the
* inode locally in gfs2_evict_inode.
*
* FIXME: We only need to verify that the remote node has
* deleted the inode because nodes before this remote delete
* rework won't cooperate. At a later time, when we no longer
* care about compatibility with such nodes, we can skip this
* step entirely.
*/ */
if (gfs2_try_evict(gl)) { if (gfs2_try_evict(gl)) {
if (gfs2_queue_delete_work(gl, 5 * HZ)) if (gfs2_queue_delete_work(gl, 5 * HZ))
return; return;
goto out;
} }
goto out;
} }
inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino,
......
...@@ -1273,8 +1273,12 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode) ...@@ -1273,8 +1273,12 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode)
* If there are no other lock holders, we'll get the lock immediately. * If there are no other lock holders, we'll get the lock immediately.
* Otherwise, the other nodes holding the lock will be notified about * Otherwise, the other nodes holding the lock will be notified about
* our locking request. If they don't have the inode open, they'll * our locking request. If they don't have the inode open, they'll
* evict the cached inode and release the lock. As a last resort, * evict the cached inode and release the lock. Otherwise, if they
* we'll eventually time out. * poke the inode glock, we'll take this as an indication that they
* still need the iopen glock and that they'll take care of deleting
* the inode when they're done. As a last resort, if another node
* keeps holding the iopen glock without showing any activity on the
* inode glock, we'll eventually time out.
* *
* Note that we're passing the LM_FLAG_TRY_1CB flag to the first * Note that we're passing the LM_FLAG_TRY_1CB flag to the first
* locking request as an optimization to notify lock holders as soon as * locking request as an optimization to notify lock holders as soon as
...@@ -1293,7 +1297,8 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode) ...@@ -1293,7 +1297,8 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode)
return false; return false;
timeout = wait_event_interruptible_timeout(sdp->sd_async_glock_wait, timeout = wait_event_interruptible_timeout(sdp->sd_async_glock_wait,
!test_bit(HIF_WAIT, &gh->gh_iflags), !test_bit(HIF_WAIT, &gh->gh_iflags) ||
test_bit(GLF_DEMOTE, &ip->i_gl->gl_flags),
timeout); timeout);
if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) { if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) {
gfs2_glock_dq(gh); gfs2_glock_dq(gh);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment