Commit 2bd5ed5d authored by Philipp Reisner's avatar Philipp Reisner Committed by Jens Axboe

drbd: Fix disconnect to keep the peer disk state if connection breaks during operation

The issue was that if the connection broke while we did the
gracefull state change to C_DISCONNECTING (C_TEARDOWN), then
we returned a success code from the state engine. (SS_CW_NO_NEED)

The result of that is that we missed to call the fence-peer
script in such a case.

Fixed that by introducing a new error code (SS_OUTDATE_WO_CONN).
This one should never reach back into user space.
Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent bb45185d
...@@ -2198,8 +2198,11 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool for ...@@ -2198,8 +2198,11 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool for
return SS_SUCCESS; return SS_SUCCESS;
case SS_PRIMARY_NOP: case SS_PRIMARY_NOP:
/* Our state checking code wants to see the peer outdated. */ /* Our state checking code wants to see the peer outdated. */
rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
pdsk, D_OUTDATED), CS_VERBOSE);
if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_VERBOSE);
break; break;
case SS_CW_FAILED_BY_PEER: case SS_CW_FAILED_BY_PEER:
/* The peer probably wants to see us outdated. */ /* The peer probably wants to see us outdated. */
......
...@@ -642,6 +642,10 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_t ...@@ -642,6 +642,10 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_t
&& os.conn < C_WF_REPORT_PARAMS) && os.conn < C_WF_REPORT_PARAMS)
rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */ rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
if (ns.conn == C_DISCONNECTING && ns.pdsk == D_OUTDATED &&
os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)
rv = SS_OUTDATE_WO_CONN;
return rv; return rv;
} }
...@@ -1748,13 +1752,9 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state ...@@ -1748,13 +1752,9 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state
if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags)) if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags))
return SS_CW_FAILED_BY_PEER; return SS_CW_FAILED_BY_PEER;
rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR; rv = conn_is_valid_transition(tconn, mask, val, 0);
if (rv == SS_SUCCESS && tconn->cstate == C_WF_REPORT_PARAMS)
if (rv == SS_UNKNOWN_ERROR) rv = SS_UNKNOWN_ERROR; /* continue waiting */
rv = conn_is_valid_transition(tconn, mask, val, 0);
if (rv == SS_SUCCESS)
rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
return rv; return rv;
} }
......
...@@ -89,6 +89,7 @@ static const char *drbd_state_sw_errors[] = { ...@@ -89,6 +89,7 @@ static const char *drbd_state_sw_errors[] = {
[-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated", [-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
[-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change", [-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change",
[-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted", [-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted",
[-SS_OUTDATE_WO_CONN] = "Need a connection for a graceful disconnect/outdate peer",
[-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config", [-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config",
}; };
......
...@@ -319,7 +319,8 @@ enum drbd_state_rv { ...@@ -319,7 +319,8 @@ enum drbd_state_rv {
SS_IN_TRANSIENT_STATE = -18, /* Retry after the next state change */ SS_IN_TRANSIENT_STATE = -18, /* Retry after the next state change */
SS_CONCURRENT_ST_CHG = -19, /* Concurrent cluster side state change! */ SS_CONCURRENT_ST_CHG = -19, /* Concurrent cluster side state change! */
SS_O_VOL_PEER_PRI = -20, SS_O_VOL_PEER_PRI = -20,
SS_AFTER_LAST_ERROR = -21, /* Keep this at bottom */ SS_OUTDATE_WO_CONN = -21,
SS_AFTER_LAST_ERROR = -22, /* Keep this at bottom */
}; };
/* from drbd_strings.c */ /* from drbd_strings.c */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment