Commit 98e1b60e authored by Mike Christie's avatar Mike Christie Committed by David Teigland

dlm: try other IPs when sctp init assoc fails

Currently, if we cannot create a association to the first IP addr
that is added to DLM, the SCTP init assoc code will just retry
the same IP. This patch adds a simple failover schemes where we
will try one of the addresses that was passed into DLM.
Signed-off-by: default avatarMike Christie <michaelc@cs.wisc.edu>
Signed-off-by: default avatarDavid Teigland <teigland@redhat.com>
parent b390ca38
...@@ -126,6 +126,7 @@ struct connection { ...@@ -126,6 +126,7 @@ struct connection {
struct connection *othercon; struct connection *othercon;
struct work_struct rwork; /* Receive workqueue */ struct work_struct rwork; /* Receive workqueue */
struct work_struct swork; /* Send workqueue */ struct work_struct swork; /* Send workqueue */
bool try_new_addr;
}; };
#define sock2con(x) ((struct connection *)(x)->sk_user_data) #define sock2con(x) ((struct connection *)(x)->sk_user_data)
...@@ -144,6 +145,7 @@ struct dlm_node_addr { ...@@ -144,6 +145,7 @@ struct dlm_node_addr {
struct list_head list; struct list_head list;
int nodeid; int nodeid;
int addr_count; int addr_count;
int curr_addr_index;
struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
}; };
...@@ -310,7 +312,7 @@ static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) ...@@ -310,7 +312,7 @@ static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y)
} }
static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
struct sockaddr *sa_out) struct sockaddr *sa_out, bool try_new_addr)
{ {
struct sockaddr_storage sas; struct sockaddr_storage sas;
struct dlm_node_addr *na; struct dlm_node_addr *na;
...@@ -320,8 +322,16 @@ static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, ...@@ -320,8 +322,16 @@ static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
spin_lock(&dlm_node_addrs_spin); spin_lock(&dlm_node_addrs_spin);
na = find_node_addr(nodeid); na = find_node_addr(nodeid);
if (na && na->addr_count) if (na && na->addr_count) {
memcpy(&sas, na->addr[0], sizeof(struct sockaddr_storage)); if (try_new_addr) {
na->curr_addr_index++;
if (na->curr_addr_index == na->addr_count)
na->curr_addr_index = 0;
}
memcpy(&sas, na->addr[na->curr_addr_index ],
sizeof(struct sockaddr_storage));
}
spin_unlock(&dlm_node_addrs_spin); spin_unlock(&dlm_node_addrs_spin);
if (!na) if (!na)
...@@ -353,19 +363,22 @@ static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) ...@@ -353,19 +363,22 @@ static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
{ {
struct dlm_node_addr *na; struct dlm_node_addr *na;
int rv = -EEXIST; int rv = -EEXIST;
int addr_i;
spin_lock(&dlm_node_addrs_spin); spin_lock(&dlm_node_addrs_spin);
list_for_each_entry(na, &dlm_node_addrs, list) { list_for_each_entry(na, &dlm_node_addrs, list) {
if (!na->addr_count) if (!na->addr_count)
continue; continue;
if (!addr_compare(na->addr[0], addr)) for (addr_i = 0; addr_i < na->addr_count; addr_i++) {
continue; if (addr_compare(na->addr[addr_i], addr)) {
*nodeid = na->nodeid; *nodeid = na->nodeid;
rv = 0; rv = 0;
break; goto unlock;
}
} }
}
unlock:
spin_unlock(&dlm_node_addrs_spin); spin_unlock(&dlm_node_addrs_spin);
return rv; return rv;
} }
...@@ -561,6 +574,21 @@ static void sctp_send_shutdown(sctp_assoc_t associd) ...@@ -561,6 +574,21 @@ static void sctp_send_shutdown(sctp_assoc_t associd)
static void sctp_init_failed_foreach(struct connection *con) static void sctp_init_failed_foreach(struct connection *con)
{ {
/*
* Don't try to recover base con and handle race where the
* other node's assoc init creates a assoc and we get that
* notification, then we get a notification that our attempt
* failed due. This happens when we are still trying the primary
* address, but the other node has already tried secondary addrs
* and found one that worked.
*/
if (!con->nodeid || con->sctp_assoc)
return;
log_print("Retrying SCTP association init for node %d\n", con->nodeid);
con->try_new_addr = true;
con->sctp_assoc = 0; con->sctp_assoc = 0;
if (test_and_clear_bit(CF_INIT_PENDING, &con->flags)) { if (test_and_clear_bit(CF_INIT_PENDING, &con->flags)) {
if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
...@@ -663,6 +691,7 @@ static void process_sctp_notification(struct connection *con, ...@@ -663,6 +691,7 @@ static void process_sctp_notification(struct connection *con,
nodeid, (int)sn->sn_assoc_change.sac_assoc_id); nodeid, (int)sn->sn_assoc_change.sac_assoc_id);
new_con->sctp_assoc = sn->sn_assoc_change.sac_assoc_id; new_con->sctp_assoc = sn->sn_assoc_change.sac_assoc_id;
new_con->try_new_addr = false;
/* Send any pending writes */ /* Send any pending writes */
clear_bit(CF_CONNECT_PENDING, &new_con->flags); clear_bit(CF_CONNECT_PENDING, &new_con->flags);
clear_bit(CF_INIT_PENDING, &new_con->flags); clear_bit(CF_INIT_PENDING, &new_con->flags);
...@@ -984,7 +1013,8 @@ static void sctp_init_assoc(struct connection *con) ...@@ -984,7 +1013,8 @@ static void sctp_init_assoc(struct connection *con)
if (con->retries++ > MAX_CONNECT_RETRIES) if (con->retries++ > MAX_CONNECT_RETRIES)
return; return;
if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr)) { if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr,
con->try_new_addr)) {
log_print("no address for nodeid %d", con->nodeid); log_print("no address for nodeid %d", con->nodeid);
return; return;
} }
...@@ -1016,6 +1046,14 @@ static void sctp_init_assoc(struct connection *con) ...@@ -1016,6 +1046,14 @@ static void sctp_init_assoc(struct connection *con)
iov[0].iov_base = page_address(e->page)+offset; iov[0].iov_base = page_address(e->page)+offset;
iov[0].iov_len = len; iov[0].iov_len = len;
if (rem_addr.ss_family == AF_INET) {
struct sockaddr_in *sin = (struct sockaddr_in *)&rem_addr;
log_print("Trying to connect to %pI4", &sin->sin_addr.s_addr);
} else {
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&rem_addr;
log_print("Trying to connect to %pI6", &sin6->sin6_addr);
}
cmsg = CMSG_FIRSTHDR(&outmessage); cmsg = CMSG_FIRSTHDR(&outmessage);
cmsg->cmsg_level = IPPROTO_SCTP; cmsg->cmsg_level = IPPROTO_SCTP;
cmsg->cmsg_type = SCTP_SNDRCV; cmsg->cmsg_type = SCTP_SNDRCV;
...@@ -1024,6 +1062,7 @@ static void sctp_init_assoc(struct connection *con) ...@@ -1024,6 +1062,7 @@ static void sctp_init_assoc(struct connection *con)
memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
sinfo->sinfo_ppid = cpu_to_le32(dlm_our_nodeid()); sinfo->sinfo_ppid = cpu_to_le32(dlm_our_nodeid());
outmessage.msg_controllen = cmsg->cmsg_len; outmessage.msg_controllen = cmsg->cmsg_len;
sinfo->sinfo_flags |= SCTP_ADDR_OVER;
ret = kernel_sendmsg(base_con->sock, &outmessage, iov, 1, len); ret = kernel_sendmsg(base_con->sock, &outmessage, iov, 1, len);
if (ret < 0) { if (ret < 0) {
...@@ -1076,7 +1115,7 @@ static void tcp_connect_to_sock(struct connection *con) ...@@ -1076,7 +1115,7 @@ static void tcp_connect_to_sock(struct connection *con)
goto out_err; goto out_err;
memset(&saddr, 0, sizeof(saddr)); memset(&saddr, 0, sizeof(saddr));
result = nodeid_to_addr(con->nodeid, &saddr, NULL); result = nodeid_to_addr(con->nodeid, &saddr, NULL, false);
if (result < 0) { if (result < 0) {
log_print("no address for nodeid %d", con->nodeid); log_print("no address for nodeid %d", con->nodeid);
goto out_err; goto out_err;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment