Commit 0dd82141 authored by Sunil Mushran's avatar Sunil Mushran Committed by Mark Fasheh

ocfs2_dlm: Add timeout to dlm join domain

Currently the ocfs2 dlm has no timeout during dlm join domain. While this is
not a problem in normal operation, this does become an issue if, say, the
other node is refusing to let the node join the domain because of a stuck
recovery. This patch adds a 90 sec timeout.
Signed-off-by: default avatarSunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: default avatarMark Fasheh <mark.fasheh@oracle.com>
parent e4968476
...@@ -1264,6 +1264,8 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) ...@@ -1264,6 +1264,8 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
static int dlm_join_domain(struct dlm_ctxt *dlm) static int dlm_join_domain(struct dlm_ctxt *dlm)
{ {
int status; int status;
unsigned int backoff;
unsigned int total_backoff = 0;
BUG_ON(!dlm); BUG_ON(!dlm);
...@@ -1295,18 +1297,27 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) ...@@ -1295,18 +1297,27 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
} }
do { do {
unsigned int backoff;
status = dlm_try_to_join_domain(dlm); status = dlm_try_to_join_domain(dlm);
/* If we're racing another node to the join, then we /* If we're racing another node to the join, then we
* need to back off temporarily and let them * need to back off temporarily and let them
* complete. */ * complete. */
#define DLM_JOIN_TIMEOUT_MSECS 90000
if (status == -EAGAIN) { if (status == -EAGAIN) {
if (signal_pending(current)) { if (signal_pending(current)) {
status = -ERESTARTSYS; status = -ERESTARTSYS;
goto bail; goto bail;
} }
if (total_backoff >
msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
status = -ERESTARTSYS;
mlog(ML_NOTICE, "Timed out joining dlm domain "
"%s after %u msecs\n", dlm->name,
jiffies_to_msecs(total_backoff));
goto bail;
}
/* /*
* <chip> After you! * <chip> After you!
* <dale> No, after you! * <dale> No, after you!
...@@ -1316,6 +1327,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) ...@@ -1316,6 +1327,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
*/ */
backoff = (unsigned int)(jiffies & 0x3); backoff = (unsigned int)(jiffies & 0x3);
backoff *= DLM_DOMAIN_BACKOFF_MS; backoff *= DLM_DOMAIN_BACKOFF_MS;
total_backoff += backoff;
mlog(0, "backoff %d\n", backoff); mlog(0, "backoff %d\n", backoff);
msleep(backoff); msleep(backoff);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment