Commit 4e6e167b authored by David S. Miller's avatar David S. Miller

Merge branch 'smc-rv23'

Karsten Graul says:

====================
net/smc: introduce SMC-Rv2 support

Please apply the following patch series for smc to netdev's net-next tree.

SMC-Rv2 support (see https://www.ibm.com/support/pages/node/6326337)
provides routable RoCE support for SMC-R, eliminating the current
same-subnet restriction, by exploiting the UDP encapsulation feature
of the RoCE adapter hardware.

v2: resend of the v1 patch series, and CC linux-rdma this time
v3: rebase after net tree was merged into net-next
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 24bcbe1c 29397e34
...@@ -84,17 +84,28 @@ enum { ...@@ -84,17 +84,28 @@ enum {
SMC_NLA_SYS_IS_ISM_V2, /* u8 */ SMC_NLA_SYS_IS_ISM_V2, /* u8 */
SMC_NLA_SYS_LOCAL_HOST, /* string */ SMC_NLA_SYS_LOCAL_HOST, /* string */
SMC_NLA_SYS_SEID, /* string */ SMC_NLA_SYS_SEID, /* string */
SMC_NLA_SYS_IS_SMCR_V2, /* u8 */
__SMC_NLA_SYS_MAX, __SMC_NLA_SYS_MAX,
SMC_NLA_SYS_MAX = __SMC_NLA_SYS_MAX - 1 SMC_NLA_SYS_MAX = __SMC_NLA_SYS_MAX - 1
}; };
/* SMC_NLA_LGR_V2 nested attributes */ /* SMC_NLA_LGR_D_V2_COMMON and SMC_NLA_LGR_R_V2_COMMON nested attributes */
enum { enum {
SMC_NLA_LGR_V2_VER, /* u8 */ SMC_NLA_LGR_V2_VER, /* u8 */
SMC_NLA_LGR_V2_REL, /* u8 */ SMC_NLA_LGR_V2_REL, /* u8 */
SMC_NLA_LGR_V2_OS, /* u8 */ SMC_NLA_LGR_V2_OS, /* u8 */
SMC_NLA_LGR_V2_NEG_EID, /* string */ SMC_NLA_LGR_V2_NEG_EID, /* string */
SMC_NLA_LGR_V2_PEER_HOST, /* string */ SMC_NLA_LGR_V2_PEER_HOST, /* string */
__SMC_NLA_LGR_V2_MAX,
SMC_NLA_LGR_V2_MAX = __SMC_NLA_LGR_V2_MAX - 1
};
/* SMC_NLA_LGR_R_V2 nested attributes */
enum {
SMC_NLA_LGR_R_V2_UNSPEC,
SMC_NLA_LGR_R_V2_DIRECT, /* u8 */
__SMC_NLA_LGR_R_V2_MAX,
SMC_NLA_LGR_R_V2_MAX = __SMC_NLA_LGR_R_V2_MAX - 1
}; };
/* SMC_GEN_LGR_SMCR attributes */ /* SMC_GEN_LGR_SMCR attributes */
...@@ -106,6 +117,8 @@ enum { ...@@ -106,6 +117,8 @@ enum {
SMC_NLA_LGR_R_PNETID, /* string */ SMC_NLA_LGR_R_PNETID, /* string */
SMC_NLA_LGR_R_VLAN_ID, /* u8 */ SMC_NLA_LGR_R_VLAN_ID, /* u8 */
SMC_NLA_LGR_R_CONNS_NUM, /* u32 */ SMC_NLA_LGR_R_CONNS_NUM, /* u32 */
SMC_NLA_LGR_R_V2_COMMON, /* nest */
SMC_NLA_LGR_R_V2, /* nest */
__SMC_NLA_LGR_R_MAX, __SMC_NLA_LGR_R_MAX,
SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1 SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1
}; };
...@@ -138,7 +151,7 @@ enum { ...@@ -138,7 +151,7 @@ enum {
SMC_NLA_LGR_D_PNETID, /* string */ SMC_NLA_LGR_D_PNETID, /* string */
SMC_NLA_LGR_D_CHID, /* u16 */ SMC_NLA_LGR_D_CHID, /* u16 */
SMC_NLA_LGR_D_PAD, /* flag */ SMC_NLA_LGR_D_PAD, /* flag */
SMC_NLA_LGR_V2, /* nest */ SMC_NLA_LGR_D_V2_COMMON, /* nest */
__SMC_NLA_LGR_D_MAX, __SMC_NLA_LGR_D_MAX,
SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1 SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1
}; };
......
...@@ -439,6 +439,47 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc) ...@@ -439,6 +439,47 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
return 0; return 0;
} }
static bool smc_isascii(char *hostname)
{
int i;
for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
if (!isascii(hostname[i]))
return false;
return true;
}
static void smc_conn_save_peer_info_fce(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)clc;
struct smc_clc_first_contact_ext *fce;
int clc_v2_len;
if (clc->hdr.version == SMC_V1 ||
!(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
return;
if (smc->conn.lgr->is_smcd) {
memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid,
SMC_MAX_EID_LEN);
clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
d1);
} else {
memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid,
SMC_MAX_EID_LEN);
clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
r1);
}
fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len);
smc->conn.lgr->peer_os = fce->os_type;
smc->conn.lgr->peer_smc_release = fce->release;
if (smc_isascii(fce->hostname))
memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
SMC_MAX_HOSTNAME_LEN);
}
static void smcr_conn_save_peer_info(struct smc_sock *smc, static void smcr_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc) struct smc_clc_msg_accept_confirm *clc)
{ {
...@@ -451,16 +492,6 @@ static void smcr_conn_save_peer_info(struct smc_sock *smc, ...@@ -451,16 +492,6 @@ static void smcr_conn_save_peer_info(struct smc_sock *smc,
smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
} }
static bool smc_isascii(char *hostname)
{
int i;
for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
if (!isascii(hostname[i]))
return false;
return true;
}
static void smcd_conn_save_peer_info(struct smc_sock *smc, static void smcd_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc) struct smc_clc_msg_accept_confirm *clc)
{ {
...@@ -472,22 +503,6 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc, ...@@ -472,22 +503,6 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc,
smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
if (clc->hdr.version > SMC_V1 &&
(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) {
struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)clc;
struct smc_clc_first_contact_ext *fce =
(struct smc_clc_first_contact_ext *)
(((u8 *)clc_v2) + sizeof(*clc_v2));
memcpy(smc->conn.lgr->negotiated_eid, clc_v2->eid,
SMC_MAX_EID_LEN);
smc->conn.lgr->peer_os = fce->os_type;
smc->conn.lgr->peer_smc_release = fce->release;
if (smc_isascii(fce->hostname))
memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
SMC_MAX_HOSTNAME_LEN);
}
} }
static void smc_conn_save_peer_info(struct smc_sock *smc, static void smc_conn_save_peer_info(struct smc_sock *smc,
...@@ -497,14 +512,16 @@ static void smc_conn_save_peer_info(struct smc_sock *smc, ...@@ -497,14 +512,16 @@ static void smc_conn_save_peer_info(struct smc_sock *smc,
smcd_conn_save_peer_info(smc, clc); smcd_conn_save_peer_info(smc, clc);
else else
smcr_conn_save_peer_info(smc, clc); smcr_conn_save_peer_info(smc, clc);
smc_conn_save_peer_info_fce(smc, clc);
} }
static void smc_link_save_peer_info(struct smc_link *link, static void smc_link_save_peer_info(struct smc_link *link,
struct smc_clc_msg_accept_confirm *clc) struct smc_clc_msg_accept_confirm *clc,
struct smc_init_info *ini)
{ {
link->peer_qpn = ntoh24(clc->r0.qpn); link->peer_qpn = ntoh24(clc->r0.qpn);
memcpy(link->peer_gid, clc->r0.lcl.gid, SMC_GID_SIZE); memcpy(link->peer_gid, ini->peer_gid, SMC_GID_SIZE);
memcpy(link->peer_mac, clc->r0.lcl.mac, sizeof(link->peer_mac)); memcpy(link->peer_mac, ini->peer_mac, sizeof(link->peer_mac));
link->peer_psn = ntoh24(clc->r0.psn); link->peer_psn = ntoh24(clc->r0.psn);
link->peer_mtu = clc->r0.qp_mtu; link->peer_mtu = clc->r0.qp_mtu;
} }
...@@ -608,7 +625,9 @@ static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini) ...@@ -608,7 +625,9 @@ static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
* used for the internal TCP socket * used for the internal TCP socket
*/ */
smc_pnet_find_roce_resource(smc->clcsock->sk, ini); smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
if (!ini->ib_dev) if (!ini->check_smcrv2 && !ini->ib_dev)
return SMC_CLC_DECL_NOSMCRDEV;
if (ini->check_smcrv2 && !ini->smcrv2.ib_dev_v2)
return SMC_CLC_DECL_NOSMCRDEV; return SMC_CLC_DECL_NOSMCRDEV;
return 0; return 0;
} }
...@@ -692,27 +711,42 @@ static int smc_find_proposal_devices(struct smc_sock *smc, ...@@ -692,27 +711,42 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
int rc = 0; int rc = 0;
/* check if there is an ism device available */ /* check if there is an ism device available */
if (ini->smcd_version & SMC_V1) { if (!(ini->smcd_version & SMC_V1) ||
if (smc_find_ism_device(smc, ini) || smc_find_ism_device(smc, ini) ||
smc_connect_ism_vlan_setup(smc, ini)) { smc_connect_ism_vlan_setup(smc, ini))
if (ini->smc_type_v1 == SMC_TYPE_B) ini->smcd_version &= ~SMC_V1;
ini->smc_type_v1 = SMC_TYPE_R; /* else ISM V1 is supported for this connection */
else
ini->smc_type_v1 = SMC_TYPE_N; /* check if there is an rdma device available */
} /* else ISM V1 is supported for this connection */ if (!(ini->smcr_version & SMC_V1) ||
if (smc_find_rdma_device(smc, ini)) { smc_find_rdma_device(smc, ini))
if (ini->smc_type_v1 == SMC_TYPE_B) ini->smcr_version &= ~SMC_V1;
ini->smc_type_v1 = SMC_TYPE_D; /* else RDMA is supported for this connection */
else
ini->smc_type_v1 = SMC_TYPE_N; ini->smc_type_v1 = smc_indicated_type(ini->smcd_version & SMC_V1,
} /* else RDMA is supported for this connection */ ini->smcr_version & SMC_V1);
}
if (smc_ism_is_v2_capable() && smc_find_ism_v2_device_clnt(smc, ini)) /* check if there is an ism v2 device available */
ini->smc_type_v2 = SMC_TYPE_N; if (!(ini->smcd_version & SMC_V2) ||
!smc_ism_is_v2_capable() ||
smc_find_ism_v2_device_clnt(smc, ini))
ini->smcd_version &= ~SMC_V2;
/* check if there is an rdma v2 device available */
ini->check_smcrv2 = true;
ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
if (!(ini->smcr_version & SMC_V2) ||
smc->clcsock->sk->sk_family != AF_INET ||
!smc_clc_ueid_count() ||
smc_find_rdma_device(smc, ini))
ini->smcr_version &= ~SMC_V2;
ini->check_smcrv2 = false;
ini->smc_type_v2 = smc_indicated_type(ini->smcd_version & SMC_V2,
ini->smcr_version & SMC_V2);
/* if neither ISM nor RDMA are supported, fallback */ /* if neither ISM nor RDMA are supported, fallback */
if (!smcr_indicated(ini->smc_type_v1) && if (ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
rc = SMC_CLC_DECL_NOSMCDEV; rc = SMC_CLC_DECL_NOSMCDEV;
return rc; return rc;
...@@ -752,6 +786,64 @@ static int smc_connect_clc(struct smc_sock *smc, ...@@ -752,6 +786,64 @@ static int smc_connect_clc(struct smc_sock *smc,
SMC_CLC_ACCEPT, CLC_WAIT_TIME); SMC_CLC_ACCEPT, CLC_WAIT_TIME);
} }
void smc_fill_gid_list(struct smc_link_group *lgr,
struct smc_gidlist *gidlist,
struct smc_ib_device *known_dev, u8 *known_gid)
{
struct smc_init_info *alt_ini = NULL;
memset(gidlist, 0, sizeof(*gidlist));
memcpy(gidlist->list[gidlist->len++], known_gid, SMC_GID_SIZE);
alt_ini = kzalloc(sizeof(*alt_ini), GFP_KERNEL);
if (!alt_ini)
goto out;
alt_ini->vlan_id = lgr->vlan_id;
alt_ini->check_smcrv2 = true;
alt_ini->smcrv2.saddr = lgr->saddr;
smc_pnet_find_alt_roce(lgr, alt_ini, known_dev);
if (!alt_ini->smcrv2.ib_dev_v2)
goto out;
memcpy(gidlist->list[gidlist->len++], alt_ini->smcrv2.ib_gid_v2,
SMC_GID_SIZE);
out:
kfree(alt_ini);
}
static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
struct smc_init_info *ini)
{
struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
struct smc_clc_first_contact_ext *fce =
(struct smc_clc_first_contact_ext *)
(((u8 *)clc_v2) + sizeof(*clc_v2));
if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
return 0;
if (fce->v2_direct) {
memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
ini->smcrv2.uses_gateway = false;
} else {
if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
ini->smcrv2.nexthop_mac,
&ini->smcrv2.uses_gateway))
return SMC_CLC_DECL_NOROUTE;
if (!ini->smcrv2.uses_gateway) {
/* mismatch: peer claims indirect, but its direct */
return SMC_CLC_DECL_NOINDIRECT;
}
}
return 0;
}
/* setup for RDMA connection of client */ /* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc, static int smc_connect_rdma(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc, struct smc_clc_msg_accept_confirm *aclc,
...@@ -759,11 +851,18 @@ static int smc_connect_rdma(struct smc_sock *smc, ...@@ -759,11 +851,18 @@ static int smc_connect_rdma(struct smc_sock *smc,
{ {
int i, reason_code = 0; int i, reason_code = 0;
struct smc_link *link; struct smc_link *link;
u8 *eid = NULL;
ini->is_smcd = false; ini->is_smcd = false;
ini->ib_lcl = &aclc->r0.lcl;
ini->ib_clcqpn = ntoh24(aclc->r0.qpn); ini->ib_clcqpn = ntoh24(aclc->r0.qpn);
ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK; ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN);
memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE);
memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN);
reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini);
if (reason_code)
return reason_code;
mutex_lock(&smc_client_lgr_pending); mutex_lock(&smc_client_lgr_pending);
reason_code = smc_conn_create(smc, ini); reason_code = smc_conn_create(smc, ini);
...@@ -785,8 +884,9 @@ static int smc_connect_rdma(struct smc_sock *smc, ...@@ -785,8 +884,9 @@ static int smc_connect_rdma(struct smc_sock *smc,
if (l->peer_qpn == ntoh24(aclc->r0.qpn) && if (l->peer_qpn == ntoh24(aclc->r0.qpn) &&
!memcmp(l->peer_gid, &aclc->r0.lcl.gid, !memcmp(l->peer_gid, &aclc->r0.lcl.gid,
SMC_GID_SIZE) && SMC_GID_SIZE) &&
(aclc->hdr.version > SMC_V1 ||
!memcmp(l->peer_mac, &aclc->r0.lcl.mac, !memcmp(l->peer_mac, &aclc->r0.lcl.mac,
sizeof(l->peer_mac))) { sizeof(l->peer_mac)))) {
link = l; link = l;
break; break;
} }
...@@ -805,7 +905,7 @@ static int smc_connect_rdma(struct smc_sock *smc, ...@@ -805,7 +905,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
} }
if (ini->first_contact_local) if (ini->first_contact_local)
smc_link_save_peer_info(link, aclc); smc_link_save_peer_info(link, aclc, ini);
if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) { if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) {
reason_code = SMC_CLC_DECL_ERR_RTOK; reason_code = SMC_CLC_DECL_ERR_RTOK;
...@@ -828,8 +928,18 @@ static int smc_connect_rdma(struct smc_sock *smc, ...@@ -828,8 +928,18 @@ static int smc_connect_rdma(struct smc_sock *smc,
} }
smc_rmb_sync_sg_for_device(&smc->conn); smc_rmb_sync_sg_for_device(&smc->conn);
if (aclc->hdr.version > SMC_V1) {
struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
eid = clc_v2->r1.eid;
if (ini->first_contact_local)
smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist,
link->smcibdev, link->gid);
}
reason_code = smc_clc_send_confirm(smc, ini->first_contact_local, reason_code = smc_clc_send_confirm(smc, ini->first_contact_local,
SMC_V1, NULL); aclc->hdr.version, eid, ini);
if (reason_code) if (reason_code)
goto connect_abort; goto connect_abort;
...@@ -869,7 +979,7 @@ smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc, ...@@ -869,7 +979,7 @@ smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc,
int i; int i;
for (i = 0; i < ini->ism_offered_cnt + 1; i++) { for (i = 0; i < ini->ism_offered_cnt + 1; i++) {
if (ini->ism_chid[i] == ntohs(aclc->chid)) { if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) {
ini->ism_selected = i; ini->ism_selected = i;
return 0; return 0;
} }
...@@ -923,11 +1033,11 @@ static int smc_connect_ism(struct smc_sock *smc, ...@@ -923,11 +1033,11 @@ static int smc_connect_ism(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm_v2 *clc_v2 = struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)aclc; (struct smc_clc_msg_accept_confirm_v2 *)aclc;
eid = clc_v2->eid; eid = clc_v2->d1.eid;
} }
rc = smc_clc_send_confirm(smc, ini->first_contact_local, rc = smc_clc_send_confirm(smc, ini->first_contact_local,
aclc->hdr.version, eid); aclc->hdr.version, eid, NULL);
if (rc) if (rc)
goto connect_abort; goto connect_abort;
mutex_unlock(&smc_server_lgr_pending); mutex_unlock(&smc_server_lgr_pending);
...@@ -950,16 +1060,23 @@ static int smc_connect_ism(struct smc_sock *smc, ...@@ -950,16 +1060,23 @@ static int smc_connect_ism(struct smc_sock *smc,
static int smc_connect_check_aclc(struct smc_init_info *ini, static int smc_connect_check_aclc(struct smc_init_info *ini,
struct smc_clc_msg_accept_confirm *aclc) struct smc_clc_msg_accept_confirm *aclc)
{ {
if (aclc->hdr.typev1 != SMC_TYPE_R &&
aclc->hdr.typev1 != SMC_TYPE_D)
return SMC_CLC_DECL_MODEUNSUPP;
if (aclc->hdr.version >= SMC_V2) {
if ((aclc->hdr.typev1 == SMC_TYPE_R &&
!smcr_indicated(ini->smc_type_v2)) ||
(aclc->hdr.typev1 == SMC_TYPE_D &&
!smcd_indicated(ini->smc_type_v2)))
return SMC_CLC_DECL_MODEUNSUPP;
} else {
if ((aclc->hdr.typev1 == SMC_TYPE_R && if ((aclc->hdr.typev1 == SMC_TYPE_R &&
!smcr_indicated(ini->smc_type_v1)) || !smcr_indicated(ini->smc_type_v1)) ||
(aclc->hdr.typev1 == SMC_TYPE_D && (aclc->hdr.typev1 == SMC_TYPE_D &&
((!smcd_indicated(ini->smc_type_v1) && !smcd_indicated(ini->smc_type_v1)))
!smcd_indicated(ini->smc_type_v2)) ||
(aclc->hdr.version == SMC_V1 &&
!smcd_indicated(ini->smc_type_v1)) ||
(aclc->hdr.version == SMC_V2 &&
!smcd_indicated(ini->smc_type_v2)))))
return SMC_CLC_DECL_MODEUNSUPP; return SMC_CLC_DECL_MODEUNSUPP;
}
return 0; return 0;
} }
...@@ -991,14 +1108,15 @@ static int __smc_connect(struct smc_sock *smc) ...@@ -991,14 +1108,15 @@ static int __smc_connect(struct smc_sock *smc)
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM, return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM,
version); version);
ini->smcd_version = SMC_V1; ini->smcd_version = SMC_V1 | SMC_V2;
ini->smcd_version |= smc_ism_is_v2_capable() ? SMC_V2 : 0; ini->smcr_version = SMC_V1 | SMC_V2;
ini->smc_type_v1 = SMC_TYPE_B; ini->smc_type_v1 = SMC_TYPE_B;
ini->smc_type_v2 = smc_ism_is_v2_capable() ? SMC_TYPE_D : SMC_TYPE_N; ini->smc_type_v2 = SMC_TYPE_B;
/* get vlan id from IP device */ /* get vlan id from IP device */
if (smc_vlan_by_tcpsk(smc->clcsock, ini)) { if (smc_vlan_by_tcpsk(smc->clcsock, ini)) {
ini->smcd_version &= ~SMC_V1; ini->smcd_version &= ~SMC_V1;
ini->smcr_version = 0;
ini->smc_type_v1 = SMC_TYPE_N; ini->smc_type_v1 = SMC_TYPE_N;
if (!ini->smcd_version) { if (!ini->smcd_version) {
rc = SMC_CLC_DECL_GETVLANERR; rc = SMC_CLC_DECL_GETVLANERR;
...@@ -1026,15 +1144,17 @@ static int __smc_connect(struct smc_sock *smc) ...@@ -1026,15 +1144,17 @@ static int __smc_connect(struct smc_sock *smc)
/* check if smc modes and versions of CLC proposal and accept match */ /* check if smc modes and versions of CLC proposal and accept match */
rc = smc_connect_check_aclc(ini, aclc); rc = smc_connect_check_aclc(ini, aclc);
version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2; version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2;
ini->smcd_version = version;
if (rc) if (rc)
goto vlan_cleanup; goto vlan_cleanup;
/* depending on previous steps, connect using rdma or ism */ /* depending on previous steps, connect using rdma or ism */
if (aclc->hdr.typev1 == SMC_TYPE_R) if (aclc->hdr.typev1 == SMC_TYPE_R) {
ini->smcr_version = version;
rc = smc_connect_rdma(smc, aclc, ini); rc = smc_connect_rdma(smc, aclc, ini);
else if (aclc->hdr.typev1 == SMC_TYPE_D) } else if (aclc->hdr.typev1 == SMC_TYPE_D) {
ini->smcd_version = version;
rc = smc_connect_ism(smc, aclc, ini); rc = smc_connect_ism(smc, aclc, ini);
}
if (rc) if (rc)
goto vlan_cleanup; goto vlan_cleanup;
...@@ -1315,7 +1435,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc) ...@@ -1315,7 +1435,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
/* initial contact - try to establish second link */ /* initial contact - try to establish second link */
smc_llc_srv_add_link(link); smc_llc_srv_add_link(link, NULL);
return 0; return 0;
} }
...@@ -1395,33 +1515,48 @@ static int smc_listen_v2_check(struct smc_sock *new_smc, ...@@ -1395,33 +1515,48 @@ static int smc_listen_v2_check(struct smc_sock *new_smc,
ini->smc_type_v1 = pclc->hdr.typev1; ini->smc_type_v1 = pclc->hdr.typev1;
ini->smc_type_v2 = pclc->hdr.typev2; ini->smc_type_v2 = pclc->hdr.typev2;
ini->smcd_version = ini->smc_type_v1 != SMC_TYPE_N ? SMC_V1 : 0; ini->smcd_version = smcd_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
if (pclc->hdr.version > SMC_V1) ini->smcr_version = smcr_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
ini->smcd_version |= if (pclc->hdr.version > SMC_V1) {
ini->smc_type_v2 != SMC_TYPE_N ? SMC_V2 : 0; if (smcd_indicated(ini->smc_type_v2))
if (!(ini->smcd_version & SMC_V2)) { ini->smcd_version |= SMC_V2;
if (smcr_indicated(ini->smc_type_v2))
ini->smcr_version |= SMC_V2;
}
if (!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) {
rc = SMC_CLC_DECL_PEERNOSMC; rc = SMC_CLC_DECL_PEERNOSMC;
goto out; goto out;
} }
if (!smc_ism_is_v2_capable()) {
ini->smcd_version &= ~SMC_V2;
rc = SMC_CLC_DECL_NOISM2SUPP;
goto out;
}
pclc_v2_ext = smc_get_clc_v2_ext(pclc); pclc_v2_ext = smc_get_clc_v2_ext(pclc);
if (!pclc_v2_ext) { if (!pclc_v2_ext) {
ini->smcd_version &= ~SMC_V2; ini->smcd_version &= ~SMC_V2;
ini->smcr_version &= ~SMC_V2;
rc = SMC_CLC_DECL_NOV2EXT; rc = SMC_CLC_DECL_NOV2EXT;
goto out; goto out;
} }
pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext); pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext);
if (!pclc_smcd_v2_ext) { if (ini->smcd_version & SMC_V2) {
if (!smc_ism_is_v2_capable()) {
ini->smcd_version &= ~SMC_V2;
rc = SMC_CLC_DECL_NOISM2SUPP;
} else if (!pclc_smcd_v2_ext) {
ini->smcd_version &= ~SMC_V2; ini->smcd_version &= ~SMC_V2;
rc = SMC_CLC_DECL_NOV2DEXT; rc = SMC_CLC_DECL_NOV2DEXT;
} else if (!pclc_v2_ext->hdr.eid_cnt &&
!pclc_v2_ext->hdr.flag.seid) {
ini->smcd_version &= ~SMC_V2;
rc = SMC_CLC_DECL_NOUEID;
}
}
if (ini->smcr_version & SMC_V2) {
if (!pclc_v2_ext->hdr.eid_cnt) {
ini->smcr_version &= ~SMC_V2;
rc = SMC_CLC_DECL_NOUEID;
}
} }
out: out:
if (!ini->smcd_version) if (!ini->smcd_version && !ini->smcr_version)
return rc; return rc;
return 0; return 0;
...@@ -1541,10 +1676,6 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, ...@@ -1541,10 +1676,6 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
pclc_smcd = smc_get_clc_msg_smcd(pclc); pclc_smcd = smc_get_clc_msg_smcd(pclc);
smc_v2_ext = smc_get_clc_v2_ext(pclc); smc_v2_ext = smc_get_clc_v2_ext(pclc);
smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext); smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
if (!smcd_v2_ext) {
smc_find_ism_store_rc(SMC_CLC_DECL_NOV2DEXT, ini);
goto not_found;
}
mutex_lock(&smcd_dev_list.mutex); mutex_lock(&smcd_dev_list.mutex);
if (pclc_smcd->ism.chid) if (pclc_smcd->ism.chid)
...@@ -1562,8 +1693,10 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, ...@@ -1562,8 +1693,10 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
} }
mutex_unlock(&smcd_dev_list.mutex); mutex_unlock(&smcd_dev_list.mutex);
if (!ini->ism_dev[0]) if (!ini->ism_dev[0]) {
smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini);
goto not_found; goto not_found;
}
smc_ism_get_system_eid(&eid); smc_ism_get_system_eid(&eid);
if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext,
...@@ -1616,6 +1749,7 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc, ...@@ -1616,6 +1749,7 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
not_found: not_found:
smc_find_ism_store_rc(rc, ini); smc_find_ism_store_rc(rc, ini);
ini->smcd_version &= ~SMC_V1;
ini->ism_dev[0] = NULL; ini->ism_dev[0] = NULL;
ini->is_smcd = false; ini->is_smcd = false;
} }
...@@ -1634,24 +1768,69 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first) ...@@ -1634,24 +1768,69 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
return 0; return 0;
} }
static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini)
{
struct smc_clc_v2_extension *smc_v2_ext;
u8 smcr_version;
int rc;
if (!(ini->smcr_version & SMC_V2) || !smcr_indicated(ini->smc_type_v2))
goto not_found;
smc_v2_ext = smc_get_clc_v2_ext(pclc);
if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL))
goto not_found;
/* prepare RDMA check */
memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
memcpy(ini->peer_gid, smc_v2_ext->roce, SMC_GID_SIZE);
memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
ini->check_smcrv2 = true;
ini->smcrv2.clc_sk = new_smc->clcsock->sk;
ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr;
ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce);
rc = smc_find_rdma_device(new_smc, ini);
if (rc) {
smc_find_ism_store_rc(rc, ini);
goto not_found;
}
if (!ini->smcrv2.uses_gateway)
memcpy(ini->smcrv2.nexthop_mac, pclc->lcl.mac, ETH_ALEN);
smcr_version = ini->smcr_version;
ini->smcr_version = SMC_V2;
rc = smc_listen_rdma_init(new_smc, ini);
if (!rc)
rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local);
if (!rc)
return;
ini->smcr_version = smcr_version;
smc_find_ism_store_rc(rc, ini);
not_found:
ini->smcr_version &= ~SMC_V2;
ini->check_smcrv2 = false;
}
static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc, static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc, struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini) struct smc_init_info *ini)
{ {
int rc; int rc;
if (!smcr_indicated(ini->smc_type_v1)) if (!(ini->smcr_version & SMC_V1) || !smcr_indicated(ini->smc_type_v1))
return SMC_CLC_DECL_NOSMCDEV; return SMC_CLC_DECL_NOSMCDEV;
/* prepare RDMA check */ /* prepare RDMA check */
ini->ib_lcl = &pclc->lcl; memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
memcpy(ini->peer_gid, pclc->lcl.gid, SMC_GID_SIZE);
memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
rc = smc_find_rdma_device(new_smc, ini); rc = smc_find_rdma_device(new_smc, ini);
if (rc) { if (rc) {
/* no RDMA device found */ /* no RDMA device found */
if (ini->smc_type_v1 == SMC_TYPE_B) return SMC_CLC_DECL_NOSMCDEV;
/* neither ISM nor RDMA device found */
rc = SMC_CLC_DECL_NOSMCDEV;
return rc;
} }
rc = smc_listen_rdma_init(new_smc, ini); rc = smc_listen_rdma_init(new_smc, ini);
if (rc) if (rc)
...@@ -1664,51 +1843,60 @@ static int smc_listen_find_device(struct smc_sock *new_smc, ...@@ -1664,51 +1843,60 @@ static int smc_listen_find_device(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc, struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini) struct smc_init_info *ini)
{ {
int rc; int prfx_rc;
/* check for ISM device matching V2 proposed device */ /* check for ISM device matching V2 proposed device */
smc_find_ism_v2_device_serv(new_smc, pclc, ini); smc_find_ism_v2_device_serv(new_smc, pclc, ini);
if (ini->ism_dev[0]) if (ini->ism_dev[0])
return 0; return 0;
if (!(ini->smcd_version & SMC_V1)) /* check for matching IP prefix and subnet length (V1) */
return ini->rc ?: SMC_CLC_DECL_NOSMCD2DEV; prfx_rc = smc_listen_prfx_check(new_smc, pclc);
if (prfx_rc)
/* check for matching IP prefix and subnet length */ smc_find_ism_store_rc(prfx_rc, ini);
rc = smc_listen_prfx_check(new_smc, pclc);
if (rc)
return ini->rc ?: rc;
/* get vlan id from IP device */ /* get vlan id from IP device */
if (smc_vlan_by_tcpsk(new_smc->clcsock, ini)) if (smc_vlan_by_tcpsk(new_smc->clcsock, ini))
return ini->rc ?: SMC_CLC_DECL_GETVLANERR; return ini->rc ?: SMC_CLC_DECL_GETVLANERR;
/* check for ISM device matching V1 proposed device */ /* check for ISM device matching V1 proposed device */
if (!prfx_rc)
smc_find_ism_v1_device_serv(new_smc, pclc, ini); smc_find_ism_v1_device_serv(new_smc, pclc, ini);
if (ini->ism_dev[0]) if (ini->ism_dev[0])
return 0; return 0;
if (pclc->hdr.typev1 == SMC_TYPE_D) if (!smcr_indicated(pclc->hdr.typev1) &&
!smcr_indicated(pclc->hdr.typev2))
/* skip RDMA and decline */ /* skip RDMA and decline */
return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV; return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV;
/* check if RDMA is available */ /* check if RDMA V2 is available */
smc_find_rdma_v2_device_serv(new_smc, pclc, ini);
if (ini->smcrv2.ib_dev_v2)
return 0;
/* check if RDMA V1 is available */
if (!prfx_rc) {
int rc;
rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini); rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
smc_find_ism_store_rc(rc, ini); smc_find_ism_store_rc(rc, ini);
return (!rc) ? 0 : ini->rc; return (!rc) ? 0 : ini->rc;
}
return SMC_CLC_DECL_NOSMCDEV;
} }
/* listen worker: finish RDMA setup */ /* listen worker: finish RDMA setup */
static int smc_listen_rdma_finish(struct smc_sock *new_smc, static int smc_listen_rdma_finish(struct smc_sock *new_smc,
struct smc_clc_msg_accept_confirm *cclc, struct smc_clc_msg_accept_confirm *cclc,
bool local_first) bool local_first,
struct smc_init_info *ini)
{ {
struct smc_link *link = new_smc->conn.lnk; struct smc_link *link = new_smc->conn.lnk;
int reason_code = 0; int reason_code = 0;
if (local_first) if (local_first)
smc_link_save_peer_info(link, cclc); smc_link_save_peer_info(link, cclc, ini);
if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc))
return SMC_CLC_DECL_ERR_RTOK; return SMC_CLC_DECL_ERR_RTOK;
...@@ -1729,12 +1917,13 @@ static void smc_listen_work(struct work_struct *work) ...@@ -1729,12 +1917,13 @@ static void smc_listen_work(struct work_struct *work)
{ {
struct smc_sock *new_smc = container_of(work, struct smc_sock, struct smc_sock *new_smc = container_of(work, struct smc_sock,
smc_listen_work); smc_listen_work);
u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1;
struct socket *newclcsock = new_smc->clcsock; struct socket *newclcsock = new_smc->clcsock;
struct smc_clc_msg_accept_confirm *cclc; struct smc_clc_msg_accept_confirm *cclc;
struct smc_clc_msg_proposal_area *buf; struct smc_clc_msg_proposal_area *buf;
struct smc_clc_msg_proposal *pclc; struct smc_clc_msg_proposal *pclc;
struct smc_init_info *ini = NULL; struct smc_init_info *ini = NULL;
u8 proposal_version = SMC_V1;
u8 accept_version;
int rc = 0; int rc = 0;
if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
...@@ -1765,7 +1954,9 @@ static void smc_listen_work(struct work_struct *work) ...@@ -1765,7 +1954,9 @@ static void smc_listen_work(struct work_struct *work)
SMC_CLC_PROPOSAL, CLC_WAIT_TIME); SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
if (rc) if (rc)
goto out_decl; goto out_decl;
version = pclc->hdr.version == SMC_V1 ? SMC_V1 : version;
if (pclc->hdr.version > SMC_V1)
proposal_version = SMC_V2;
/* IPSec connections opt out of SMC optimizations */ /* IPSec connections opt out of SMC optimizations */
if (using_ipsec(new_smc)) { if (using_ipsec(new_smc)) {
...@@ -1795,9 +1986,9 @@ static void smc_listen_work(struct work_struct *work) ...@@ -1795,9 +1986,9 @@ static void smc_listen_work(struct work_struct *work)
goto out_unlock; goto out_unlock;
/* send SMC Accept CLC message */ /* send SMC Accept CLC message */
accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version;
rc = smc_clc_send_accept(new_smc, ini->first_contact_local, rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
ini->smcd_version == SMC_V2 ? SMC_V2 : SMC_V1, accept_version, ini->negotiated_eid);
ini->negotiated_eid);
if (rc) if (rc)
goto out_unlock; goto out_unlock;
...@@ -1819,7 +2010,7 @@ static void smc_listen_work(struct work_struct *work) ...@@ -1819,7 +2010,7 @@ static void smc_listen_work(struct work_struct *work)
/* finish worker */ /* finish worker */
if (!ini->is_smcd) { if (!ini->is_smcd) {
rc = smc_listen_rdma_finish(new_smc, cclc, rc = smc_listen_rdma_finish(new_smc, cclc,
ini->first_contact_local); ini->first_contact_local, ini);
if (rc) if (rc)
goto out_unlock; goto out_unlock;
mutex_unlock(&smc_server_lgr_pending); mutex_unlock(&smc_server_lgr_pending);
...@@ -1833,7 +2024,7 @@ static void smc_listen_work(struct work_struct *work) ...@@ -1833,7 +2024,7 @@ static void smc_listen_work(struct work_struct *work)
mutex_unlock(&smc_server_lgr_pending); mutex_unlock(&smc_server_lgr_pending);
out_decl: out_decl:
smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0, smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0,
version); proposal_version);
out_free: out_free:
kfree(ini); kfree(ini);
kfree(buf); kfree(buf);
......
...@@ -56,7 +56,20 @@ enum smc_state { /* possible states of an SMC socket */ ...@@ -56,7 +56,20 @@ enum smc_state { /* possible states of an SMC socket */
struct smc_link_group; struct smc_link_group;
struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */ struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */
union {
u8 type; u8 type;
#if defined(__BIG_ENDIAN_BITFIELD)
struct {
u8 llc_version:4,
llc_type:4;
};
#elif defined(__LITTLE_ENDIAN_BITFIELD)
struct {
u8 llc_type:4,
llc_version:4;
};
#endif
};
} __aligned(1); } __aligned(1);
struct smc_cdc_conn_state_flags { struct smc_cdc_conn_state_flags {
...@@ -286,7 +299,12 @@ static inline bool using_ipsec(struct smc_sock *smc) ...@@ -286,7 +299,12 @@ static inline bool using_ipsec(struct smc_sock *smc)
} }
#endif #endif
struct smc_gidlist;
struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock); struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
void smc_close_non_accepted(struct sock *sk); void smc_close_non_accepted(struct sock *sk);
void smc_fill_gid_list(struct smc_link_group *lgr,
struct smc_gidlist *gidlist,
struct smc_ib_device *known_dev, u8 *known_gid);
#endif /* __SMC_H */ #endif /* __SMC_H */
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68 #define SMCR_CLC_ACCEPT_CONFIRM_LEN 68
#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48 #define SMCD_CLC_ACCEPT_CONFIRM_LEN 48
#define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78 #define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78
#define SMCR_CLC_ACCEPT_CONFIRM_LEN_V2 108
#define SMC_CLC_RECV_BUF_LEN 100 #define SMC_CLC_RECV_BUF_LEN 100
/* eye catcher "SMCR" EBCDIC for CLC messages */ /* eye catcher "SMCR" EBCDIC for CLC messages */
...@@ -114,6 +115,17 @@ static int smc_clc_ueid_add(char *ueid) ...@@ -114,6 +115,17 @@ static int smc_clc_ueid_add(char *ueid)
return rc; return rc;
} }
int smc_clc_ueid_count(void)
{
int count;
read_lock(&smc_clc_eid_table.lock);
count = smc_clc_eid_table.ueid_cnt;
read_unlock(&smc_clc_eid_table.lock);
return count;
}
int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info) int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info)
{ {
struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY]; struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY];
...@@ -298,7 +310,8 @@ bool smc_clc_match_eid(u8 *negotiated_eid, ...@@ -298,7 +310,8 @@ bool smc_clc_match_eid(u8 *negotiated_eid,
negotiated_eid[0] = 0; negotiated_eid[0] = 0;
read_lock(&smc_clc_eid_table.lock); read_lock(&smc_clc_eid_table.lock);
if (smc_clc_eid_table.seid_enabled && if (peer_eid && local_eid &&
smc_clc_eid_table.seid_enabled &&
smc_v2_ext->hdr.flag.seid && smc_v2_ext->hdr.flag.seid &&
!memcmp(peer_eid, local_eid, SMC_MAX_EID_LEN)) { !memcmp(peer_eid, local_eid, SMC_MAX_EID_LEN)) {
memcpy(negotiated_eid, peer_eid, SMC_MAX_EID_LEN); memcpy(negotiated_eid, peer_eid, SMC_MAX_EID_LEN);
...@@ -380,6 +393,27 @@ smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2) ...@@ -380,6 +393,27 @@ smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2)
(ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 + (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 +
sizeof(struct smc_clc_first_contact_ext))) sizeof(struct smc_clc_first_contact_ext)))
return false; return false;
if (hdr->typev1 == SMC_TYPE_R &&
ntohs(hdr->length) < SMCR_CLC_ACCEPT_CONFIRM_LEN_V2)
return false;
}
return true;
}
/* check arriving CLC decline */
static bool
smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc)
{
struct smc_clc_msg_hdr *hdr = &dclc->hdr;
if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D)
return false;
if (hdr->version == SMC_V1) {
if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline))
return false;
} else {
if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline_v2))
return false;
} }
return true; return true;
} }
...@@ -425,9 +459,9 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) ...@@ -425,9 +459,9 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl)
break; break;
case SMC_CLC_DECLINE: case SMC_CLC_DECLINE:
dclc = (struct smc_clc_msg_decline *)clcm; dclc = (struct smc_clc_msg_decline *)clcm;
if (ntohs(dclc->hdr.length) != sizeof(*dclc)) if (!smc_clc_msg_decl_valid(dclc))
return false; return false;
trl = &dclc->trl; check_trl = false;
break; break;
default: default:
return false; return false;
...@@ -726,15 +760,16 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, ...@@ -726,15 +760,16 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
/* send CLC DECLINE message across internal TCP socket */ /* send CLC DECLINE message across internal TCP socket */
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version) int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version)
{ {
struct smc_clc_msg_decline dclc; struct smc_clc_msg_decline *dclc_v1;
struct smc_clc_msg_decline_v2 dclc;
struct msghdr msg; struct msghdr msg;
int len, send_len;
struct kvec vec; struct kvec vec;
int len;
dclc_v1 = (struct smc_clc_msg_decline *)&dclc;
memset(&dclc, 0, sizeof(dclc)); memset(&dclc, 0, sizeof(dclc));
memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
dclc.hdr.type = SMC_CLC_DECLINE; dclc.hdr.type = SMC_CLC_DECLINE;
dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
dclc.hdr.version = version; dclc.hdr.version = version;
dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX; dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX;
dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ?
...@@ -744,14 +779,22 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version) ...@@ -744,14 +779,22 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version)
memcpy(dclc.id_for_peer, local_systemid, memcpy(dclc.id_for_peer, local_systemid,
sizeof(local_systemid)); sizeof(local_systemid));
dclc.peer_diagnosis = htonl(peer_diag_info); dclc.peer_diagnosis = htonl(peer_diag_info);
memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); if (version == SMC_V1) {
memcpy(dclc_v1->trl.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
send_len = sizeof(*dclc_v1);
} else {
memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
send_len = sizeof(dclc);
}
dclc.hdr.length = htons(send_len);
memset(&msg, 0, sizeof(msg)); memset(&msg, 0, sizeof(msg));
vec.iov_base = &dclc; vec.iov_base = &dclc;
vec.iov_len = sizeof(struct smc_clc_msg_decline); vec.iov_len = send_len;
len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, send_len);
sizeof(struct smc_clc_msg_decline)); if (len < 0 || len < send_len)
if (len < 0 || len < sizeof(struct smc_clc_msg_decline))
len = -EPROTO; len = -EPROTO;
return len > 0 ? 0 : len; return len > 0 ? 0 : len;
} }
...@@ -833,8 +876,8 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) ...@@ -833,8 +876,8 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
} else { } else {
struct smc_clc_eid_entry *ueident; struct smc_clc_eid_entry *ueident;
u16 v2_ext_offset; u16 v2_ext_offset;
u8 *eid = NULL;
v2_ext->hdr.flag.release = SMC_RELEASE;
v2_ext_offset = sizeof(*pclc_smcd) - v2_ext_offset = sizeof(*pclc_smcd) -
offsetofend(struct smc_clc_msg_smcd, v2_ext_offset); offsetofend(struct smc_clc_msg_smcd, v2_ext_offset);
if (ini->smc_type_v1 != SMC_TYPE_N) if (ini->smc_type_v1 != SMC_TYPE_N)
...@@ -842,6 +885,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) ...@@ -842,6 +885,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
pclc_prfx->ipv6_prefixes_cnt * pclc_prfx->ipv6_prefixes_cnt *
sizeof(ipv6_prfx[0]); sizeof(ipv6_prfx[0]);
pclc_smcd->v2_ext_offset = htons(v2_ext_offset); pclc_smcd->v2_ext_offset = htons(v2_ext_offset);
plen += sizeof(*v2_ext);
read_lock(&smc_clc_eid_table.lock); read_lock(&smc_clc_eid_table.lock);
v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt; v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt;
...@@ -851,10 +895,13 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) ...@@ -851,10 +895,13 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
memcpy(v2_ext->user_eids[i++], ueident->eid, memcpy(v2_ext->user_eids[i++], ueident->eid,
sizeof(ueident->eid)); sizeof(ueident->eid));
} }
v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled;
read_unlock(&smc_clc_eid_table.lock); read_unlock(&smc_clc_eid_table.lock);
}
if (smcd_indicated(ini->smc_type_v2)) {
u8 *eid = NULL;
v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled;
v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt; v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt;
v2_ext->hdr.flag.release = SMC_RELEASE;
v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) - v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) -
offsetofend(struct smc_clnt_opts_area_hdr, offsetofend(struct smc_clnt_opts_area_hdr,
smcd_v2_ext_offset) + smcd_v2_ext_offset) +
...@@ -862,7 +909,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) ...@@ -862,7 +909,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
smc_ism_get_system_eid(&eid); smc_ism_get_system_eid(&eid);
if (eid && v2_ext->hdr.flag.seid) if (eid && v2_ext->hdr.flag.seid)
memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN); memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN);
plen += sizeof(*v2_ext) + sizeof(*smcd_v2_ext); plen += sizeof(*smcd_v2_ext);
if (ini->ism_offered_cnt) { if (ini->ism_offered_cnt) {
for (i = 1; i <= ini->ism_offered_cnt; i++) { for (i = 1; i <= ini->ism_offered_cnt; i++) {
gidchids[i - 1].gid = gidchids[i - 1].gid =
...@@ -874,6 +921,9 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) ...@@ -874,6 +921,9 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
sizeof(struct smc_clc_smcd_gid_chid); sizeof(struct smc_clc_smcd_gid_chid);
} }
} }
if (smcr_indicated(ini->smc_type_v2))
memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE);
pclc_base->hdr.length = htons(plen); pclc_base->hdr.length = htons(plen);
memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
...@@ -897,6 +947,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) ...@@ -897,6 +947,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
vec[i].iov_base = v2_ext; vec[i].iov_base = v2_ext;
vec[i++].iov_len = sizeof(*v2_ext) + vec[i++].iov_len = sizeof(*v2_ext) +
(v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN); (v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN);
if (smcd_indicated(ini->smc_type_v2)) {
vec[i].iov_base = smcd_v2_ext; vec[i].iov_base = smcd_v2_ext;
vec[i++].iov_len = sizeof(*smcd_v2_ext); vec[i++].iov_len = sizeof(*smcd_v2_ext);
if (ini->ism_offered_cnt) { if (ini->ism_offered_cnt) {
...@@ -905,6 +956,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) ...@@ -905,6 +956,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
sizeof(struct smc_clc_smcd_gid_chid); sizeof(struct smc_clc_smcd_gid_chid);
} }
} }
}
vec[i].iov_base = trl; vec[i].iov_base = trl;
vec[i++].iov_len = sizeof(*trl); vec[i++].iov_len = sizeof(*trl);
/* due to the few bytes needed for clc-handshake this cannot block */ /* due to the few bytes needed for clc-handshake this cannot block */
...@@ -925,13 +977,14 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) ...@@ -925,13 +977,14 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
static int smc_clc_send_confirm_accept(struct smc_sock *smc, static int smc_clc_send_confirm_accept(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm_v2 *clc_v2, struct smc_clc_msg_accept_confirm_v2 *clc_v2,
int first_contact, u8 version, int first_contact, u8 version,
u8 *eid) u8 *eid, struct smc_init_info *ini)
{ {
struct smc_connection *conn = &smc->conn; struct smc_connection *conn = &smc->conn;
struct smc_clc_msg_accept_confirm *clc; struct smc_clc_msg_accept_confirm *clc;
struct smc_clc_first_contact_ext fce; struct smc_clc_first_contact_ext fce;
struct smc_clc_fce_gid_ext gle;
struct smc_clc_msg_trail trl; struct smc_clc_msg_trail trl;
struct kvec vec[3]; struct kvec vec[5];
struct msghdr msg; struct msghdr msg;
int i, len; int i, len;
...@@ -953,9 +1006,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, ...@@ -953,9 +1006,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
if (version == SMC_V1) { if (version == SMC_V1) {
clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
} else { } else {
clc_v2->chid = htons(smc_ism_get_chid(conn->lgr->smcd)); clc_v2->d1.chid =
if (eid[0]) htons(smc_ism_get_chid(conn->lgr->smcd));
memcpy(clc_v2->eid, eid, SMC_MAX_EID_LEN); if (eid && eid[0])
memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN);
len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
if (first_contact) if (first_contact)
smc_clc_fill_fce(&fce, &len); smc_clc_fill_fce(&fce, &len);
...@@ -994,6 +1048,26 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, ...@@ -994,6 +1048,26 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
(conn->rmb_desc->sgt[link->link_idx].sgl)); (conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(clc->r0.psn, link->psn_initial); hton24(clc->r0.psn, link->psn_initial);
if (version == SMC_V1) {
clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
} else {
if (eid && eid[0])
memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN);
len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2;
if (first_contact) {
smc_clc_fill_fce(&fce, &len);
fce.v2_direct = !link->lgr->uses_gateway;
memset(&gle, 0, sizeof(gle));
if (ini && clc->hdr.type == SMC_CLC_CONFIRM) {
gle.gid_cnt = ini->smcrv2.gidlist.len;
len += sizeof(gle);
len += gle.gid_cnt * sizeof(gle.gid[0]);
} else {
len += sizeof(gle.reserved);
}
}
clc_v2->hdr.length = htons(len);
}
memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
} }
...@@ -1001,7 +1075,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, ...@@ -1001,7 +1075,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
i = 0; i = 0;
vec[i].iov_base = clc_v2; vec[i].iov_base = clc_v2;
if (version > SMC_V1) if (version > SMC_V1)
vec[i++].iov_len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 - sizeof(trl); vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ?
SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 :
SMCR_CLC_ACCEPT_CONFIRM_LEN_V2) -
sizeof(trl);
else else
vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ?
SMCD_CLC_ACCEPT_CONFIRM_LEN : SMCD_CLC_ACCEPT_CONFIRM_LEN :
...@@ -1010,6 +1087,18 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, ...@@ -1010,6 +1087,18 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
if (version > SMC_V1 && first_contact) { if (version > SMC_V1 && first_contact) {
vec[i].iov_base = &fce; vec[i].iov_base = &fce;
vec[i++].iov_len = sizeof(fce); vec[i++].iov_len = sizeof(fce);
if (!conn->lgr->is_smcd) {
if (clc->hdr.type == SMC_CLC_CONFIRM) {
vec[i].iov_base = &gle;
vec[i++].iov_len = sizeof(gle);
vec[i].iov_base = &ini->smcrv2.gidlist.list;
vec[i++].iov_len = gle.gid_cnt *
sizeof(gle.gid[0]);
} else {
vec[i].iov_base = &gle.reserved;
vec[i++].iov_len = sizeof(gle.reserved);
}
}
} }
vec[i].iov_base = &trl; vec[i].iov_base = &trl;
vec[i++].iov_len = sizeof(trl); vec[i++].iov_len = sizeof(trl);
...@@ -1019,7 +1108,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, ...@@ -1019,7 +1108,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
/* send CLC CONFIRM message across internal TCP socket */ /* send CLC CONFIRM message across internal TCP socket */
int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
u8 version, u8 *eid) u8 version, u8 *eid, struct smc_init_info *ini)
{ {
struct smc_clc_msg_accept_confirm_v2 cclc_v2; struct smc_clc_msg_accept_confirm_v2 cclc_v2;
int reason_code = 0; int reason_code = 0;
...@@ -1029,7 +1118,7 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, ...@@ -1029,7 +1118,7 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
memset(&cclc_v2, 0, sizeof(cclc_v2)); memset(&cclc_v2, 0, sizeof(cclc_v2));
cclc_v2.hdr.type = SMC_CLC_CONFIRM; cclc_v2.hdr.type = SMC_CLC_CONFIRM;
len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact, len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact,
version, eid); version, eid, ini);
if (len < ntohs(cclc_v2.hdr.length)) { if (len < ntohs(cclc_v2.hdr.length)) {
if (len >= 0) { if (len >= 0) {
reason_code = -ENETUNREACH; reason_code = -ENETUNREACH;
...@@ -1052,7 +1141,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, ...@@ -1052,7 +1141,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
memset(&aclc_v2, 0, sizeof(aclc_v2)); memset(&aclc_v2, 0, sizeof(aclc_v2));
aclc_v2.hdr.type = SMC_CLC_ACCEPT; aclc_v2.hdr.type = SMC_CLC_ACCEPT;
len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact, len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact,
version, negotiated_eid); version, negotiated_eid, NULL);
if (len < ntohs(aclc_v2.hdr.length)) if (len < ntohs(aclc_v2.hdr.length))
len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err; len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
......
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#define SMC_CLC_DECL_NOV2DEXT 0x03030005 /* peer sent no clc SMC-Dv2 ext. */ #define SMC_CLC_DECL_NOV2DEXT 0x03030005 /* peer sent no clc SMC-Dv2 ext. */
#define SMC_CLC_DECL_NOSEID 0x03030006 /* peer sent no SEID */ #define SMC_CLC_DECL_NOSEID 0x03030006 /* peer sent no SEID */
#define SMC_CLC_DECL_NOSMCD2DEV 0x03030007 /* no SMC-Dv2 device found */ #define SMC_CLC_DECL_NOSMCD2DEV 0x03030007 /* no SMC-Dv2 device found */
#define SMC_CLC_DECL_NOUEID 0x03030008 /* peer sent no UEID */
#define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/ #define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/
#define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */ #define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */
#define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */ #define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */
...@@ -54,6 +55,8 @@ ...@@ -54,6 +55,8 @@
#define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */ #define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */
#define SMC_CLC_DECL_VERSMISMAT 0x030c0000 /* SMC version mismatch */ #define SMC_CLC_DECL_VERSMISMAT 0x030c0000 /* SMC version mismatch */
#define SMC_CLC_DECL_MAX_DMB 0x030d0000 /* SMC-D DMB limit exceeded */ #define SMC_CLC_DECL_MAX_DMB 0x030d0000 /* SMC-D DMB limit exceeded */
#define SMC_CLC_DECL_NOROUTE 0x030e0000 /* SMC-Rv2 conn. no route to peer */
#define SMC_CLC_DECL_NOINDIRECT 0x030f0000 /* SMC-Rv2 conn. indirect mismatch*/
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */ #define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
#define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */ #define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */
#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */ #define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
...@@ -213,11 +216,14 @@ struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */ ...@@ -213,11 +216,14 @@ struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */
#define SMC_CLC_OS_AIX 3 #define SMC_CLC_OS_AIX 3
struct smc_clc_first_contact_ext { struct smc_clc_first_contact_ext {
u8 reserved1;
#if defined(__BIG_ENDIAN_BITFIELD) #if defined(__BIG_ENDIAN_BITFIELD)
u8 v2_direct : 1,
reserved : 7;
u8 os_type : 4, u8 os_type : 4,
release : 4; release : 4;
#elif defined(__LITTLE_ENDIAN_BITFIELD) #elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 reserved : 7,
v2_direct : 1;
u8 release : 4, u8 release : 4,
os_type : 4; os_type : 4;
#endif #endif
...@@ -225,6 +231,13 @@ struct smc_clc_first_contact_ext { ...@@ -225,6 +231,13 @@ struct smc_clc_first_contact_ext {
u8 hostname[SMC_MAX_HOSTNAME_LEN]; u8 hostname[SMC_MAX_HOSTNAME_LEN];
}; };
struct smc_clc_fce_gid_ext {
u8 reserved[16];
u8 gid_cnt;
u8 reserved2[3];
u8 gid[][SMC_GID_SIZE];
};
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr; struct smc_clc_msg_hdr hdr;
union { union {
...@@ -239,13 +252,17 @@ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ ...@@ -239,13 +252,17 @@ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */ struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr; struct smc_clc_msg_hdr hdr;
union { union {
struct smcr_clc_msg_accept_confirm r0; /* SMC-R */ struct { /* SMC-R */
struct smcr_clc_msg_accept_confirm r0;
u8 eid[SMC_MAX_EID_LEN];
u8 reserved6[8];
} r1;
struct { /* SMC-D */ struct { /* SMC-D */
struct smcd_clc_msg_accept_confirm_common d0; struct smcd_clc_msg_accept_confirm_common d0;
__be16 chid; __be16 chid;
u8 eid[SMC_MAX_EID_LEN]; u8 eid[SMC_MAX_EID_LEN];
u8 reserved5[8]; u8 reserved5[8];
}; } d1;
}; };
}; };
...@@ -264,6 +281,24 @@ struct smc_clc_msg_decline { /* clc decline message */ ...@@ -264,6 +281,24 @@ struct smc_clc_msg_decline { /* clc decline message */
struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */ struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */
} __aligned(4); } __aligned(4);
#define SMC_DECL_DIAG_COUNT_V2 4 /* no. of additional peer diagnosis codes */
struct smc_clc_msg_decline_v2 { /* clc decline message */
struct smc_clc_msg_hdr hdr;
u8 id_for_peer[SMC_SYSTEMID_LEN]; /* sender peer_id */
__be32 peer_diagnosis; /* diagnosis information */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 os_type : 4,
reserved : 4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 reserved : 4,
os_type : 4;
#endif
u8 reserved2[3];
__be32 peer_diagnosis_v2[SMC_DECL_DIAG_COUNT_V2];
struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */
} __aligned(4);
/* determine start of the prefix area within the proposal message */ /* determine start of the prefix area within the proposal message */
static inline struct smc_clc_msg_proposal_prefix * static inline struct smc_clc_msg_proposal_prefix *
smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
...@@ -282,6 +317,17 @@ static inline bool smcd_indicated(int smc_type) ...@@ -282,6 +317,17 @@ static inline bool smcd_indicated(int smc_type)
return smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B; return smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B;
} }
static inline u8 smc_indicated_type(int is_smcd, int is_smcr)
{
if (is_smcd && is_smcr)
return SMC_TYPE_B;
if (is_smcd)
return SMC_TYPE_D;
if (is_smcr)
return SMC_TYPE_R;
return SMC_TYPE_N;
}
/* get SMC-D info from proposal message */ /* get SMC-D info from proposal message */
static inline struct smc_clc_msg_smcd * static inline struct smc_clc_msg_smcd *
smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop) smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop)
...@@ -334,7 +380,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, ...@@ -334,7 +380,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version); int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version);
int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini); int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini);
int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
u8 version, u8 *eid); u8 version, u8 *eid, struct smc_init_info *ini);
int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact, int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact,
u8 version, u8 *negotiated_eid); u8 version, u8 *negotiated_eid);
void smc_clc_init(void) __init; void smc_clc_init(void) __init;
...@@ -343,6 +389,7 @@ void smc_clc_get_hostname(u8 **host); ...@@ -343,6 +389,7 @@ void smc_clc_get_hostname(u8 **host);
bool smc_clc_match_eid(u8 *negotiated_eid, bool smc_clc_match_eid(u8 *negotiated_eid,
struct smc_clc_v2_extension *smc_v2_ext, struct smc_clc_v2_extension *smc_v2_ext,
u8 *peer_eid, u8 *local_eid); u8 *peer_eid, u8 *local_eid);
int smc_clc_ueid_count(void);
int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb); int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb);
int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info); int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info);
int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info); int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info);
......
...@@ -244,6 +244,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -244,6 +244,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
goto errattr; goto errattr;
if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable())) if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable()))
goto errattr; goto errattr;
if (nla_put_u8(skb, SMC_NLA_SYS_IS_SMCR_V2, true))
goto errattr;
smc_clc_get_hostname(&host); smc_clc_get_hostname(&host);
if (host) { if (host) {
memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN); memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN);
...@@ -271,12 +273,65 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -271,12 +273,65 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len; return skb->len;
} }
/* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */
static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr,
struct sk_buff *skb,
struct netlink_callback *cb,
struct nlattr *v2_attrs)
{
char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
char smc_eid[SMC_MAX_EID_LEN + 1];
if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version))
goto errv2attr;
if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release))
goto errv2attr;
if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
goto errv2attr;
memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
goto errv2attr;
memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
smc_eid[SMC_MAX_EID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
goto errv2attr;
nla_nest_end(skb, v2_attrs);
return 0;
errv2attr:
nla_nest_cancel(skb, v2_attrs);
return -EMSGSIZE;
}
static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr,
struct sk_buff *skb,
struct netlink_callback *cb)
{
struct nlattr *v2_attrs;
v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2);
if (!v2_attrs)
goto errattr;
if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway))
goto errv2attr;
nla_nest_end(skb, v2_attrs);
return 0;
errv2attr:
nla_nest_cancel(skb, v2_attrs);
errattr:
return -EMSGSIZE;
}
static int smc_nl_fill_lgr(struct smc_link_group *lgr, static int smc_nl_fill_lgr(struct smc_link_group *lgr,
struct sk_buff *skb, struct sk_buff *skb,
struct netlink_callback *cb) struct netlink_callback *cb)
{ {
char smc_target[SMC_MAX_PNETID_LEN + 1]; char smc_target[SMC_MAX_PNETID_LEN + 1];
struct nlattr *attrs; struct nlattr *attrs, *v2_attrs;
attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR); attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR);
if (!attrs) if (!attrs)
...@@ -296,6 +351,15 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr, ...@@ -296,6 +351,15 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr,
smc_target[SMC_MAX_PNETID_LEN] = 0; smc_target[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target)) if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
goto errattr; goto errattr;
if (lgr->smc_version > SMC_V1) {
v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON);
if (!v2_attrs)
goto errattr;
if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
goto errattr;
if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb))
goto errattr;
}
nla_nest_end(skb, attrs); nla_nest_end(skb, attrs);
return 0; return 0;
...@@ -428,10 +492,7 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, ...@@ -428,10 +492,7 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
struct sk_buff *skb, struct sk_buff *skb,
struct netlink_callback *cb) struct netlink_callback *cb)
{ {
char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
char smc_pnet[SMC_MAX_PNETID_LEN + 1]; char smc_pnet[SMC_MAX_PNETID_LEN + 1];
char smc_eid[SMC_MAX_EID_LEN + 1];
struct nlattr *v2_attrs;
struct nlattr *attrs; struct nlattr *attrs;
void *nlh; void *nlh;
...@@ -463,32 +524,19 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, ...@@ -463,32 +524,19 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
smc_pnet[SMC_MAX_PNETID_LEN] = 0; smc_pnet[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet)) if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
goto errattr; goto errattr;
if (lgr->smc_version > SMC_V1) {
struct nlattr *v2_attrs;
v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_V2); v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_D_V2_COMMON);
if (!v2_attrs) if (!v2_attrs)
goto errattr; goto errattr;
if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version)) if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
goto errv2attr; goto errattr;
if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release)) }
goto errv2attr;
if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
goto errv2attr;
memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
goto errv2attr;
memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
smc_eid[SMC_MAX_EID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
goto errv2attr;
nla_nest_end(skb, v2_attrs);
nla_nest_end(skb, attrs); nla_nest_end(skb, attrs);
genlmsg_end(skb, nlh); genlmsg_end(skb, nlh);
return 0; return 0;
errv2attr:
nla_nest_cancel(skb, v2_attrs);
errattr: errattr:
nla_nest_cancel(skb, attrs); nla_nest_cancel(skb, attrs);
errout: errout:
...@@ -684,24 +732,30 @@ static void smcr_copy_dev_info_to_link(struct smc_link *link) ...@@ -684,24 +732,30 @@ static void smcr_copy_dev_info_to_link(struct smc_link *link)
int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
u8 link_idx, struct smc_init_info *ini) u8 link_idx, struct smc_init_info *ini)
{ {
struct smc_ib_device *smcibdev;
u8 rndvec[3]; u8 rndvec[3];
int rc; int rc;
get_device(&ini->ib_dev->ibdev->dev); if (lgr->smc_version == SMC_V2) {
atomic_inc(&ini->ib_dev->lnk_cnt); lnk->smcibdev = ini->smcrv2.ib_dev_v2;
lnk->ibport = ini->smcrv2.ib_port_v2;
} else {
lnk->smcibdev = ini->ib_dev;
lnk->ibport = ini->ib_port;
}
get_device(&lnk->smcibdev->ibdev->dev);
atomic_inc(&lnk->smcibdev->lnk_cnt);
lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
lnk->link_id = smcr_next_link_id(lgr); lnk->link_id = smcr_next_link_id(lgr);
lnk->lgr = lgr; lnk->lgr = lgr;
lnk->link_idx = link_idx; lnk->link_idx = link_idx;
lnk->smcibdev = ini->ib_dev;
lnk->ibport = ini->ib_port;
smc_ibdev_cnt_inc(lnk); smc_ibdev_cnt_inc(lnk);
smcr_copy_dev_info_to_link(lnk); smcr_copy_dev_info_to_link(lnk);
lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
atomic_set(&lnk->conn_cnt, 0); atomic_set(&lnk->conn_cnt, 0);
smc_llc_link_set_uid(lnk); smc_llc_link_set_uid(lnk);
INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
if (!ini->ib_dev->initialized) { if (!lnk->smcibdev->initialized) {
rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev); rc = (int)smc_ib_setup_per_ibdev(lnk->smcibdev);
if (rc) if (rc)
goto out; goto out;
} }
...@@ -709,7 +763,9 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, ...@@ -709,7 +763,9 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
(rndvec[2] << 16); (rndvec[2] << 16);
rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
ini->vlan_id, lnk->gid, &lnk->sgid_index); ini->vlan_id, lnk->gid, &lnk->sgid_index,
lgr->smc_version == SMC_V2 ?
&ini->smcrv2 : NULL);
if (rc) if (rc)
goto out; goto out;
rc = smc_llc_link_init(lnk); rc = smc_llc_link_init(lnk);
...@@ -740,11 +796,12 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, ...@@ -740,11 +796,12 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
smc_llc_link_clear(lnk, false); smc_llc_link_clear(lnk, false);
out: out:
smc_ibdev_cnt_dec(lnk); smc_ibdev_cnt_dec(lnk);
put_device(&ini->ib_dev->ibdev->dev); put_device(&lnk->smcibdev->ibdev->dev);
smcibdev = lnk->smcibdev;
memset(lnk, 0, sizeof(struct smc_link)); memset(lnk, 0, sizeof(struct smc_link));
lnk->state = SMC_LNK_UNUSED; lnk->state = SMC_LNK_UNUSED;
if (!atomic_dec_return(&ini->ib_dev->lnk_cnt)) if (!atomic_dec_return(&smcibdev->lnk_cnt))
wake_up(&ini->ib_dev->lnks_deleted); wake_up(&smcibdev->lnks_deleted);
return rc; return rc;
} }
...@@ -808,18 +865,37 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) ...@@ -808,18 +865,37 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt); atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
} else { } else {
/* SMC-R specific settings */ /* SMC-R specific settings */
struct smc_ib_device *ibdev;
int ibport;
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, lgr->smc_version = ini->smcr_version;
memcpy(lgr->peer_systemid, ini->peer_systemid,
SMC_SYSTEMID_LEN); SMC_SYSTEMID_LEN);
memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1], if (lgr->smc_version == SMC_V2) {
ibdev = ini->smcrv2.ib_dev_v2;
ibport = ini->smcrv2.ib_port_v2;
lgr->saddr = ini->smcrv2.saddr;
lgr->uses_gateway = ini->smcrv2.uses_gateway;
memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
ETH_ALEN);
} else {
ibdev = ini->ib_dev;
ibport = ini->ib_port;
}
memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
SMC_MAX_PNETID_LEN); SMC_MAX_PNETID_LEN);
if (smc_wr_alloc_lgr_mem(lgr))
goto free_wq;
smc_llc_lgr_init(lgr, smc); smc_llc_lgr_init(lgr, smc);
link_idx = SMC_SINGLE_LINK; link_idx = SMC_SINGLE_LINK;
lnk = &lgr->lnk[link_idx]; lnk = &lgr->lnk[link_idx];
rc = smcr_link_init(lgr, lnk, link_idx, ini); rc = smcr_link_init(lgr, lnk, link_idx, ini);
if (rc) if (rc) {
smc_wr_free_lgr_mem(lgr);
goto free_wq; goto free_wq;
}
lgr_list = &smc_lgr_list.list; lgr_list = &smc_lgr_list.list;
lgr_lock = &smc_lgr_list.lock; lgr_lock = &smc_lgr_list.lock;
atomic_inc(&lgr_cnt); atomic_inc(&lgr_cnt);
...@@ -1226,6 +1302,7 @@ static void smc_lgr_free(struct smc_link_group *lgr) ...@@ -1226,6 +1302,7 @@ static void smc_lgr_free(struct smc_link_group *lgr)
if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
wake_up(&lgr->smcd->lgrs_deleted); wake_up(&lgr->smcd->lgrs_deleted);
} else { } else {
smc_wr_free_lgr_mem(lgr);
if (!atomic_dec_return(&lgr_cnt)) if (!atomic_dec_return(&lgr_cnt))
wake_up(&lgrs_deleted); wake_up(&lgrs_deleted);
} }
...@@ -1636,13 +1713,15 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) ...@@ -1636,13 +1713,15 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
return rc; return rc;
} }
static bool smcr_lgr_match(struct smc_link_group *lgr, static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
struct smc_clc_msg_local *lcl, u8 peer_systemid[],
u8 peer_gid[],
u8 peer_mac_v1[],
enum smc_lgr_role role, u32 clcqpn) enum smc_lgr_role role, u32 clcqpn)
{ {
int i; int i;
if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) || if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) ||
lgr->role != role) lgr->role != role)
return false; return false;
...@@ -1650,8 +1729,9 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, ...@@ -1650,8 +1729,9 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
if (!smc_link_active(&lgr->lnk[i])) if (!smc_link_active(&lgr->lnk[i]))
continue; continue;
if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) && if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
!memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) && !memcmp(lgr->lnk[i].peer_gid, peer_gid, SMC_GID_SIZE) &&
!memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac))) (smcr_version == SMC_V2 ||
!memcmp(lgr->lnk[i].peer_mac, peer_mac_v1, ETH_ALEN)))
return true; return true;
} }
return false; return false;
...@@ -1690,7 +1770,10 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) ...@@ -1690,7 +1770,10 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
if ((ini->is_smcd ? if ((ini->is_smcd ?
smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected], smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
ini->ism_peer_gid[ini->ism_selected]) : ini->ism_peer_gid[ini->ism_selected]) :
smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && smcr_lgr_match(lgr, ini->smcr_version,
ini->peer_systemid,
ini->peer_gid, ini->peer_mac, role,
ini->ib_clcqpn)) &&
!lgr->sync_err && !lgr->sync_err &&
(ini->smcd_version == SMC_V2 || (ini->smcd_version == SMC_V2 ||
lgr->vlan_id == ini->vlan_id) && lgr->vlan_id == ini->vlan_id) &&
......
...@@ -42,11 +42,16 @@ enum smc_link_state { /* possible states of a link */ ...@@ -42,11 +42,16 @@ enum smc_link_state { /* possible states of a link */
}; };
#define SMC_WR_BUF_SIZE 48 /* size of work request buffer */ #define SMC_WR_BUF_SIZE 48 /* size of work request buffer */
#define SMC_WR_BUF_V2_SIZE 8192 /* size of v2 work request buffer */
struct smc_wr_buf { struct smc_wr_buf {
u8 raw[SMC_WR_BUF_SIZE]; u8 raw[SMC_WR_BUF_SIZE];
}; };
struct smc_wr_v2_buf {
u8 raw[SMC_WR_BUF_V2_SIZE];
};
#define SMC_WR_REG_MR_WAIT_TIME (5 * HZ)/* wait time for ib_wr_reg_mr result */ #define SMC_WR_REG_MR_WAIT_TIME (5 * HZ)/* wait time for ib_wr_reg_mr result */
enum smc_wr_reg_state { enum smc_wr_reg_state {
...@@ -92,7 +97,11 @@ struct smc_link { ...@@ -92,7 +97,11 @@ struct smc_link {
struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */ struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */
struct completion *wr_tx_compl; /* WR send CQE completion */ struct completion *wr_tx_compl; /* WR send CQE completion */
/* above four vectors have wr_tx_cnt elements and use the same index */ /* above four vectors have wr_tx_cnt elements and use the same index */
struct ib_send_wr *wr_tx_v2_ib; /* WR send v2 meta data */
struct ib_sge *wr_tx_v2_sge; /* WR send v2 gather meta data*/
struct smc_wr_tx_pend *wr_tx_v2_pend; /* WR send v2 waiting for CQE */
dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */ dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */
dma_addr_t wr_tx_v2_dma_addr; /* DMA address of v2 tx buf*/
atomic_long_t wr_tx_id; /* seq # of last sent WR */ atomic_long_t wr_tx_id; /* seq # of last sent WR */
unsigned long *wr_tx_mask; /* bit mask of used indexes */ unsigned long *wr_tx_mask; /* bit mask of used indexes */
u32 wr_tx_cnt; /* number of WR send buffers */ u32 wr_tx_cnt; /* number of WR send buffers */
...@@ -104,6 +113,7 @@ struct smc_link { ...@@ -104,6 +113,7 @@ struct smc_link {
struct ib_sge *wr_rx_sges; /* WR recv scatter meta data */ struct ib_sge *wr_rx_sges; /* WR recv scatter meta data */
/* above three vectors have wr_rx_cnt elements and use the same index */ /* above three vectors have wr_rx_cnt elements and use the same index */
dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */ dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */
dma_addr_t wr_rx_v2_dma_addr; /* DMA address of v2 rx buf*/
u64 wr_rx_id; /* seq # of last recv WR */ u64 wr_rx_id; /* seq # of last recv WR */
u32 wr_rx_cnt; /* number of WR recv buffers */ u32 wr_rx_cnt; /* number of WR recv buffers */
unsigned long wr_rx_tstamp; /* jiffies when last buf rx */ unsigned long wr_rx_tstamp; /* jiffies when last buf rx */
...@@ -208,6 +218,7 @@ enum smc_llc_flowtype { ...@@ -208,6 +218,7 @@ enum smc_llc_flowtype {
SMC_LLC_FLOW_NONE = 0, SMC_LLC_FLOW_NONE = 0,
SMC_LLC_FLOW_ADD_LINK = 2, SMC_LLC_FLOW_ADD_LINK = 2,
SMC_LLC_FLOW_DEL_LINK = 4, SMC_LLC_FLOW_DEL_LINK = 4,
SMC_LLC_FLOW_REQ_ADD_LINK = 5,
SMC_LLC_FLOW_RKEY = 6, SMC_LLC_FLOW_RKEY = 6,
}; };
...@@ -250,6 +261,10 @@ struct smc_link_group { ...@@ -250,6 +261,10 @@ struct smc_link_group {
/* client or server */ /* client or server */
struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; struct smc_link lnk[SMC_LINKS_PER_LGR_MAX];
/* smc link */ /* smc link */
struct smc_wr_v2_buf *wr_rx_buf_v2;
/* WR v2 recv payload buffer */
struct smc_wr_v2_buf *wr_tx_buf_v2;
/* WR v2 send payload buffer */
char peer_systemid[SMC_SYSTEMID_LEN]; char peer_systemid[SMC_SYSTEMID_LEN];
/* unique system_id of peer */ /* unique system_id of peer */
struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX] struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX]
...@@ -288,6 +303,9 @@ struct smc_link_group { ...@@ -288,6 +303,9 @@ struct smc_link_group {
/* link keep alive time */ /* link keep alive time */
u32 llc_termination_rsn; u32 llc_termination_rsn;
/* rsn code for termination */ /* rsn code for termination */
u8 nexthop_mac[ETH_ALEN];
u8 uses_gateway;
__be32 saddr;
}; };
struct { /* SMC-D */ struct { /* SMC-D */
u64 peer_gid; u64 peer_gid;
...@@ -302,6 +320,31 @@ struct smc_link_group { ...@@ -302,6 +320,31 @@ struct smc_link_group {
struct smc_clc_msg_local; struct smc_clc_msg_local;
#define GID_LIST_SIZE 2
struct smc_gidlist {
u8 len;
u8 list[GID_LIST_SIZE][SMC_GID_SIZE];
};
struct smc_init_info_smcrv2 {
/* Input fields */
__be32 saddr;
struct sock *clc_sk;
__be32 daddr;
/* Output fields when saddr is set */
struct smc_ib_device *ib_dev_v2;
u8 ib_port_v2;
u8 ib_gid_v2[SMC_GID_SIZE];
/* Additional output fields when clc_sk and daddr is set as well */
u8 uses_gateway;
u8 nexthop_mac[ETH_ALEN];
struct smc_gidlist gidlist;
};
struct smc_init_info { struct smc_init_info {
u8 is_smcd; u8 is_smcd;
u8 smc_type_v1; u8 smc_type_v1;
...@@ -312,11 +355,16 @@ struct smc_init_info { ...@@ -312,11 +355,16 @@ struct smc_init_info {
u32 rc; u32 rc;
u8 negotiated_eid[SMC_MAX_EID_LEN]; u8 negotiated_eid[SMC_MAX_EID_LEN];
/* SMC-R */ /* SMC-R */
struct smc_clc_msg_local *ib_lcl; u8 smcr_version;
u8 check_smcrv2;
u8 peer_gid[SMC_GID_SIZE];
u8 peer_mac[ETH_ALEN];
u8 peer_systemid[SMC_SYSTEMID_LEN];
struct smc_ib_device *ib_dev; struct smc_ib_device *ib_dev;
u8 ib_gid[SMC_GID_SIZE]; u8 ib_gid[SMC_GID_SIZE];
u8 ib_port; u8 ib_port;
u32 ib_clcqpn; u32 ib_clcqpn;
struct smc_init_info_smcrv2 smcrv2;
/* SMC-D */ /* SMC-D */
u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1]; u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1];
struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1]; struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1];
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/inetdevice.h>
#include <rdma/ib_verbs.h> #include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h> #include <rdma/ib_cache.h>
...@@ -62,14 +63,21 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk) ...@@ -62,14 +63,21 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk)
IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN |
IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
struct ib_qp_attr qp_attr; struct ib_qp_attr qp_attr;
u8 hop_lim = 1;
memset(&qp_attr, 0, sizeof(qp_attr)); memset(&qp_attr, 0, sizeof(qp_attr));
qp_attr.qp_state = IB_QPS_RTR; qp_attr.qp_state = IB_QPS_RTR;
qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu); qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu);
qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport); rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport);
rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, 1, 0); if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway)
hop_lim = IPV6_DEFAULT_HOPLIMIT;
rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, hop_lim, 0);
rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid); rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid);
if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway)
memcpy(&qp_attr.ah_attr.roce.dmac, lnk->lgr->nexthop_mac,
sizeof(lnk->lgr->nexthop_mac));
else
memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac, memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac,
sizeof(lnk->peer_mac)); sizeof(lnk->peer_mac));
qp_attr.dest_qp_num = lnk->peer_qpn; qp_attr.dest_qp_num = lnk->peer_qpn;
...@@ -183,9 +191,81 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) ...@@ -183,9 +191,81 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
} }
int smc_ib_find_route(__be32 saddr, __be32 daddr,
u8 nexthop_mac[], u8 *uses_gateway)
{
struct neighbour *neigh = NULL;
struct rtable *rt = NULL;
struct flowi4 fl4 = {
.saddr = saddr,
.daddr = daddr
};
if (daddr == cpu_to_be32(INADDR_NONE))
goto out;
rt = ip_route_output_flow(&init_net, &fl4, NULL);
if (IS_ERR(rt))
goto out;
if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET)
goto out;
neigh = rt->dst.ops->neigh_lookup(&rt->dst, NULL, &fl4.daddr);
if (neigh) {
memcpy(nexthop_mac, neigh->ha, ETH_ALEN);
*uses_gateway = rt->rt_uses_gateway;
return 0;
}
out:
return -ENOENT;
}
static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
const struct ib_gid_attr *attr,
u8 gid[], u8 *sgid_index,
struct smc_init_info_smcrv2 *smcrv2)
{
if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) {
if (gid)
memcpy(gid, &attr->gid, SMC_GID_SIZE);
if (sgid_index)
*sgid_index = attr->index;
return 0;
}
if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
struct in_device *in_dev = __in_dev_get_rcu(ndev);
const struct in_ifaddr *ifa;
bool subnet_match = false;
if (!in_dev)
goto out;
in_dev_for_each_ifa_rcu(ifa, in_dev) {
if (!inet_ifa_match(smcrv2->saddr, ifa))
continue;
subnet_match = true;
break;
}
if (!subnet_match)
goto out;
if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr,
smcrv2->daddr,
smcrv2->nexthop_mac,
&smcrv2->uses_gateway))
goto out;
if (gid)
memcpy(gid, &attr->gid, SMC_GID_SIZE);
if (sgid_index)
*sgid_index = attr->index;
return 0;
}
out:
return -ENODEV;
}
/* determine the gid for an ib-device port and vlan id */ /* determine the gid for an ib-device port and vlan id */
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
unsigned short vlan_id, u8 gid[], u8 *sgid_index) unsigned short vlan_id, u8 gid[], u8 *sgid_index,
struct smc_init_info_smcrv2 *smcrv2)
{ {
const struct ib_gid_attr *attr; const struct ib_gid_attr *attr;
const struct net_device *ndev; const struct net_device *ndev;
...@@ -201,22 +281,72 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, ...@@ -201,22 +281,72 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
if (!IS_ERR(ndev) && if (!IS_ERR(ndev) &&
((!vlan_id && !is_vlan_dev(ndev)) || ((!vlan_id && !is_vlan_dev(ndev)) ||
(vlan_id && is_vlan_dev(ndev) && (vlan_id && is_vlan_dev(ndev) &&
vlan_dev_vlan_id(ndev) == vlan_id)) && vlan_dev_vlan_id(ndev) == vlan_id))) {
attr->gid_type == IB_GID_TYPE_ROCE) { if (!smc_ib_determine_gid_rcu(ndev, attr, gid,
sgid_index, smcrv2)) {
rcu_read_unlock(); rcu_read_unlock();
if (gid)
memcpy(gid, &attr->gid, SMC_GID_SIZE);
if (sgid_index)
*sgid_index = attr->index;
rdma_put_gid_attr(attr); rdma_put_gid_attr(attr);
return 0; return 0;
} }
}
rcu_read_unlock(); rcu_read_unlock();
rdma_put_gid_attr(attr); rdma_put_gid_attr(attr);
} }
return -ENODEV; return -ENODEV;
} }
/* check if gid is still defined on smcibdev */
static bool smc_ib_check_link_gid(u8 gid[SMC_GID_SIZE], bool smcrv2,
struct smc_ib_device *smcibdev, u8 ibport)
{
const struct ib_gid_attr *attr;
bool rc = false;
int i;
for (i = 0; !rc && i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) {
attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i);
if (IS_ERR(attr))
continue;
rcu_read_lock();
if ((!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) ||
(smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
!(ipv6_addr_type((const struct in6_addr *)&attr->gid)
& IPV6_ADDR_LINKLOCAL)))
if (!memcmp(gid, &attr->gid, SMC_GID_SIZE))
rc = true;
rcu_read_unlock();
rdma_put_gid_attr(attr);
}
return rc;
}
/* check all links if the gid is still defined on smcibdev */
static void smc_ib_gid_check(struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_link_group *lgr;
int i;
spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry(lgr, &smc_lgr_list.list, list) {
if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
SMC_MAX_PNETID_LEN))
continue; /* lgr is not affected */
if (list_empty(&lgr->list))
continue;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].state == SMC_LNK_UNUSED ||
lgr->lnk[i].smcibdev != smcibdev)
continue;
if (!smc_ib_check_link_gid(lgr->lnk[i].gid,
lgr->smc_version == SMC_V2,
smcibdev, ibport))
smcr_port_err(smcibdev, ibport);
}
}
spin_unlock_bh(&smc_lgr_list.lock);
}
static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
{ {
int rc; int rc;
...@@ -255,6 +385,7 @@ static void smc_ib_port_event_work(struct work_struct *work) ...@@ -255,6 +385,7 @@ static void smc_ib_port_event_work(struct work_struct *work)
} else { } else {
clear_bit(port_idx, smcibdev->ports_going_away); clear_bit(port_idx, smcibdev->ports_going_away);
smcr_port_add(smcibdev, port_idx + 1); smcr_port_add(smcibdev, port_idx + 1);
smc_ib_gid_check(smcibdev, port_idx + 1);
} }
} }
} }
...@@ -523,6 +654,7 @@ void smc_ib_destroy_queue_pair(struct smc_link *lnk) ...@@ -523,6 +654,7 @@ void smc_ib_destroy_queue_pair(struct smc_link *lnk)
/* create a queue pair within the protection domain for a link */ /* create a queue pair within the protection domain for a link */
int smc_ib_create_queue_pair(struct smc_link *lnk) int smc_ib_create_queue_pair(struct smc_link *lnk)
{ {
int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1;
struct ib_qp_init_attr qp_attr = { struct ib_qp_init_attr qp_attr = {
.event_handler = smc_ib_qp_event_handler, .event_handler = smc_ib_qp_event_handler,
.qp_context = lnk, .qp_context = lnk,
...@@ -536,7 +668,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) ...@@ -536,7 +668,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
.max_send_wr = SMC_WR_BUF_CNT * 3, .max_send_wr = SMC_WR_BUF_CNT * 3,
.max_recv_wr = SMC_WR_BUF_CNT * 3, .max_recv_wr = SMC_WR_BUF_CNT * 3,
.max_send_sge = SMC_IB_MAX_SEND_SGE, .max_send_sge = SMC_IB_MAX_SEND_SGE,
.max_recv_sge = 1, .max_recv_sge = sges_per_buf,
}, },
.sq_sig_type = IB_SIGNAL_REQ_WR, .sq_sig_type = IB_SIGNAL_REQ_WR,
.qp_type = IB_QPT_RC, .qp_type = IB_QPT_RC,
......
...@@ -59,6 +59,17 @@ struct smc_ib_device { /* ib-device infos for smc */ ...@@ -59,6 +59,17 @@ struct smc_ib_device { /* ib-device infos for smc */
int ndev_ifidx[SMC_MAX_PORTS]; /* ndev if indexes */ int ndev_ifidx[SMC_MAX_PORTS]; /* ndev if indexes */
}; };
static inline __be32 smc_ib_gid_to_ipv4(u8 gid[SMC_GID_SIZE])
{
struct in6_addr *addr6 = (struct in6_addr *)gid;
if (ipv6_addr_v4mapped(addr6) ||
!(addr6->s6_addr32[0] | addr6->s6_addr32[1] | addr6->s6_addr32[2]))
return addr6->s6_addr32[3];
return cpu_to_be32(INADDR_NONE);
}
struct smc_init_info_smcrv2;
struct smc_buf_desc; struct smc_buf_desc;
struct smc_link; struct smc_link;
...@@ -90,7 +101,10 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk, ...@@ -90,7 +101,10 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk,
struct smc_buf_desc *buf_slot, struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction); enum dma_data_direction data_direction);
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
unsigned short vlan_id, u8 gid[], u8 *sgid_index); unsigned short vlan_id, u8 gid[], u8 *sgid_index,
struct smc_init_info_smcrv2 *smcrv2);
int smc_ib_find_route(__be32 saddr, __be32 daddr,
u8 nexthop_mac[], u8 *uses_gateway);
bool smc_ib_is_valid_local_systemid(void); bool smc_ib_is_valid_local_systemid(void);
int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb); int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
#endif #endif
...@@ -23,16 +23,24 @@ ...@@ -23,16 +23,24 @@
struct smc_llc_hdr { struct smc_llc_hdr {
struct smc_wr_rx_hdr common; struct smc_wr_rx_hdr common;
union {
struct {
u8 length; /* 44 */ u8 length; /* 44 */
#if defined(__BIG_ENDIAN_BITFIELD) #if defined(__BIG_ENDIAN_BITFIELD)
u8 reserved:4, u8 reserved:4,
add_link_rej_rsn:4; add_link_rej_rsn:4;
#elif defined(__LITTLE_ENDIAN_BITFIELD) #elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 add_link_rej_rsn:4, u8 add_link_rej_rsn:4,
reserved:4; reserved:4;
#endif #endif
};
u16 length_v2; /* 44 - 8192*/
};
u8 flags; u8 flags;
}; } __packed; /* format defined in
* IBM Shared Memory Communications Version 2
* (https://www.ibm.com/support/pages/node/6326337)
*/
#define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03 #define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03
...@@ -76,6 +84,32 @@ struct smc_llc_msg_add_link_cont_rt { ...@@ -76,6 +84,32 @@ struct smc_llc_msg_add_link_cont_rt {
__be64 rmb_vaddr_new; __be64 rmb_vaddr_new;
}; };
struct smc_llc_msg_add_link_v2_ext {
#if defined(__BIG_ENDIAN_BITFIELD)
u8 v2_direct : 1,
reserved : 7;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 reserved : 7,
v2_direct : 1;
#endif
u8 reserved2;
u8 client_target_gid[SMC_GID_SIZE];
u8 reserved3[8];
u16 num_rkeys;
struct smc_llc_msg_add_link_cont_rt rt[];
} __packed; /* format defined in
* IBM Shared Memory Communications Version 2
* (https://www.ibm.com/support/pages/node/6326337)
*/
struct smc_llc_msg_req_add_link_v2 {
struct smc_llc_hdr hd;
u8 reserved[20];
u8 gid_cnt;
u8 reserved2[3];
u8 gid[][SMC_GID_SIZE];
};
#define SMC_LLC_RKEYS_PER_CONT_MSG 2 #define SMC_LLC_RKEYS_PER_CONT_MSG 2
struct smc_llc_msg_add_link_cont { /* type 0x03 */ struct smc_llc_msg_add_link_cont { /* type 0x03 */
...@@ -115,6 +149,7 @@ struct smc_rmb_rtoken { ...@@ -115,6 +149,7 @@ struct smc_rmb_rtoken {
} __packed; /* format defined in RFC7609 */ } __packed; /* format defined in RFC7609 */
#define SMC_LLC_RKEYS_PER_MSG 3 #define SMC_LLC_RKEYS_PER_MSG 3
#define SMC_LLC_RKEYS_PER_MSG_V2 255
struct smc_llc_msg_confirm_rkey { /* type 0x06 */ struct smc_llc_msg_confirm_rkey { /* type 0x06 */
struct smc_llc_hdr hd; struct smc_llc_hdr hd;
...@@ -135,9 +170,18 @@ struct smc_llc_msg_delete_rkey { /* type 0x09 */ ...@@ -135,9 +170,18 @@ struct smc_llc_msg_delete_rkey { /* type 0x09 */
u8 reserved2[4]; u8 reserved2[4];
}; };
struct smc_llc_msg_delete_rkey_v2 { /* type 0x29 */
struct smc_llc_hdr hd;
u8 num_rkeys;
u8 num_inval_rkeys;
u8 reserved[2];
__be32 rkey[];
};
union smc_llc_msg { union smc_llc_msg {
struct smc_llc_msg_confirm_link confirm_link; struct smc_llc_msg_confirm_link confirm_link;
struct smc_llc_msg_add_link add_link; struct smc_llc_msg_add_link add_link;
struct smc_llc_msg_req_add_link_v2 req_add_link;
struct smc_llc_msg_add_link_cont add_link_cont; struct smc_llc_msg_add_link_cont add_link_cont;
struct smc_llc_msg_del_link delete_link; struct smc_llc_msg_del_link delete_link;
...@@ -189,7 +233,7 @@ static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow, ...@@ -189,7 +233,7 @@ static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow,
static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type, static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type,
struct smc_llc_qentry *qentry) struct smc_llc_qentry *qentry)
{ {
u8 msg_type = qentry->msg.raw.hdr.common.type; u8 msg_type = qentry->msg.raw.hdr.common.llc_type;
if ((msg_type == SMC_LLC_ADD_LINK || msg_type == SMC_LLC_DELETE_LINK) && if ((msg_type == SMC_LLC_ADD_LINK || msg_type == SMC_LLC_DELETE_LINK) &&
flow_type != msg_type && !lgr->delayed_event) { flow_type != msg_type && !lgr->delayed_event) {
...@@ -219,7 +263,7 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow, ...@@ -219,7 +263,7 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow,
spin_unlock_bh(&lgr->llc_flow_lock); spin_unlock_bh(&lgr->llc_flow_lock);
return false; return false;
} }
switch (qentry->msg.raw.hdr.common.type) { switch (qentry->msg.raw.hdr.common.llc_type) {
case SMC_LLC_ADD_LINK: case SMC_LLC_ADD_LINK:
flow->type = SMC_LLC_FLOW_ADD_LINK; flow->type = SMC_LLC_FLOW_ADD_LINK;
break; break;
...@@ -306,7 +350,7 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr, ...@@ -306,7 +350,7 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
smc_llc_flow_qentry_del(flow); smc_llc_flow_qentry_del(flow);
goto out; goto out;
} }
rcv_msg = flow->qentry->msg.raw.hdr.common.type; rcv_msg = flow->qentry->msg.raw.hdr.common.llc_type;
if (exp_msg && rcv_msg != exp_msg) { if (exp_msg && rcv_msg != exp_msg) {
if (exp_msg == SMC_LLC_ADD_LINK && if (exp_msg == SMC_LLC_ADD_LINK &&
rcv_msg == SMC_LLC_DELETE_LINK) { rcv_msg == SMC_LLC_DELETE_LINK) {
...@@ -374,6 +418,30 @@ static int smc_llc_add_pending_send(struct smc_link *link, ...@@ -374,6 +418,30 @@ static int smc_llc_add_pending_send(struct smc_link *link,
return 0; return 0;
} }
static int smc_llc_add_pending_send_v2(struct smc_link *link,
struct smc_wr_v2_buf **wr_buf,
struct smc_wr_tx_pend_priv **pend)
{
int rc;
rc = smc_wr_tx_get_v2_slot(link, smc_llc_tx_handler, wr_buf, pend);
if (rc < 0)
return rc;
return 0;
}
static void smc_llc_init_msg_hdr(struct smc_llc_hdr *hdr,
struct smc_link_group *lgr, size_t len)
{
if (lgr->smc_version == SMC_V2) {
hdr->common.llc_version = SMC_V2;
hdr->length_v2 = len;
} else {
hdr->common.llc_version = 0;
hdr->length = len;
}
}
/* high-level API to send LLC confirm link */ /* high-level API to send LLC confirm link */
int smc_llc_send_confirm_link(struct smc_link *link, int smc_llc_send_confirm_link(struct smc_link *link,
enum smc_llc_reqresp reqresp) enum smc_llc_reqresp reqresp)
...@@ -390,8 +458,8 @@ int smc_llc_send_confirm_link(struct smc_link *link, ...@@ -390,8 +458,8 @@ int smc_llc_send_confirm_link(struct smc_link *link,
goto put_out; goto put_out;
confllc = (struct smc_llc_msg_confirm_link *)wr_buf; confllc = (struct smc_llc_msg_confirm_link *)wr_buf;
memset(confllc, 0, sizeof(*confllc)); memset(confllc, 0, sizeof(*confllc));
confllc->hd.common.type = SMC_LLC_CONFIRM_LINK; confllc->hd.common.llc_type = SMC_LLC_CONFIRM_LINK;
confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link); smc_llc_init_msg_hdr(&confllc->hd, link->lgr, sizeof(*confllc));
confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC; confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
if (reqresp == SMC_LLC_RESP) if (reqresp == SMC_LLC_RESP)
confllc->hd.flags |= SMC_LLC_FLAG_RESP; confllc->hd.flags |= SMC_LLC_FLAG_RESP;
...@@ -426,8 +494,8 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link, ...@@ -426,8 +494,8 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
goto put_out; goto put_out;
rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf; rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf;
memset(rkeyllc, 0, sizeof(*rkeyllc)); memset(rkeyllc, 0, sizeof(*rkeyllc));
rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY; rkeyllc->hd.common.llc_type = SMC_LLC_CONFIRM_RKEY;
rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey); smc_llc_init_msg_hdr(&rkeyllc->hd, send_link->lgr, sizeof(*rkeyllc));
rtok_ix = 1; rtok_ix = 1;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
...@@ -471,8 +539,8 @@ static int smc_llc_send_delete_rkey(struct smc_link *link, ...@@ -471,8 +539,8 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
goto put_out; goto put_out;
rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf; rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf;
memset(rkeyllc, 0, sizeof(*rkeyllc)); memset(rkeyllc, 0, sizeof(*rkeyllc));
rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY; rkeyllc->hd.common.llc_type = SMC_LLC_DELETE_RKEY;
rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey); smc_llc_init_msg_hdr(&rkeyllc->hd, link->lgr, sizeof(*rkeyllc));
rkeyllc->num_rkeys = 1; rkeyllc->num_rkeys = 1;
rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey); rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
/* send llc message */ /* send llc message */
...@@ -482,26 +550,116 @@ static int smc_llc_send_delete_rkey(struct smc_link *link, ...@@ -482,26 +550,116 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
return rc; return rc;
} }
/* return first buffer from any of the next buf lists */
static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr,
int *buf_lst)
{
struct smc_buf_desc *buf_pos;
while (*buf_lst < SMC_RMBE_SIZES) {
buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst],
struct smc_buf_desc, list);
if (buf_pos)
return buf_pos;
(*buf_lst)++;
}
return NULL;
}
/* return next rmb from buffer lists */
static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr,
int *buf_lst,
struct smc_buf_desc *buf_pos)
{
struct smc_buf_desc *buf_next;
if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) {
(*buf_lst)++;
return _smc_llc_get_next_rmb(lgr, buf_lst);
}
buf_next = list_next_entry(buf_pos, list);
return buf_next;
}
static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr,
int *buf_lst)
{
*buf_lst = 0;
return smc_llc_get_next_rmb(lgr, buf_lst, NULL);
}
static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext,
struct smc_link *link, struct smc_link *link_new)
{
struct smc_link_group *lgr = link->lgr;
struct smc_buf_desc *buf_pos;
int prim_lnk_idx, lnk_idx, i;
struct smc_buf_desc *rmb;
int len = sizeof(*ext);
int buf_lst;
ext->v2_direct = !lgr->uses_gateway;
memcpy(ext->client_target_gid, link_new->gid, SMC_GID_SIZE);
prim_lnk_idx = link->link_idx;
lnk_idx = link_new->link_idx;
mutex_lock(&lgr->rmbs_lock);
ext->num_rkeys = lgr->conns_num;
if (!ext->num_rkeys)
goto out;
buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
for (i = 0; i < ext->num_rkeys; i++) {
if (!buf_pos)
break;
rmb = buf_pos;
ext->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey);
ext->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey);
ext->rt[i].rmb_vaddr_new =
cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl));
buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos);
while (buf_pos && !(buf_pos)->used)
buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos);
}
len += i * sizeof(ext->rt[0]);
out:
mutex_unlock(&lgr->rmbs_lock);
return len;
}
/* send ADD LINK request or response */ /* send ADD LINK request or response */
int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
struct smc_link *link_new, struct smc_link *link_new,
enum smc_llc_reqresp reqresp) enum smc_llc_reqresp reqresp)
{ {
struct smc_llc_msg_add_link_v2_ext *ext = NULL;
struct smc_llc_msg_add_link *addllc; struct smc_llc_msg_add_link *addllc;
struct smc_wr_tx_pend_priv *pend; struct smc_wr_tx_pend_priv *pend;
struct smc_wr_buf *wr_buf; int len = sizeof(*addllc);
int rc; int rc;
if (!smc_wr_tx_link_hold(link)) if (!smc_wr_tx_link_hold(link))
return -ENOLINK; return -ENOLINK;
if (link->lgr->smc_version == SMC_V2) {
struct smc_wr_v2_buf *wr_buf;
rc = smc_llc_add_pending_send_v2(link, &wr_buf, &pend);
if (rc)
goto put_out;
addllc = (struct smc_llc_msg_add_link *)wr_buf;
ext = (struct smc_llc_msg_add_link_v2_ext *)
&wr_buf->raw[sizeof(*addllc)];
memset(ext, 0, SMC_WR_TX_SIZE);
} else {
struct smc_wr_buf *wr_buf;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend); rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc) if (rc)
goto put_out; goto put_out;
addllc = (struct smc_llc_msg_add_link *)wr_buf; addllc = (struct smc_llc_msg_add_link *)wr_buf;
}
memset(addllc, 0, sizeof(*addllc)); memset(addllc, 0, sizeof(*addllc));
addllc->hd.common.type = SMC_LLC_ADD_LINK; addllc->hd.common.llc_type = SMC_LLC_ADD_LINK;
addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
if (reqresp == SMC_LLC_RESP) if (reqresp == SMC_LLC_RESP)
addllc->hd.flags |= SMC_LLC_FLAG_RESP; addllc->hd.flags |= SMC_LLC_FLAG_RESP;
memcpy(addllc->sender_mac, mac, ETH_ALEN); memcpy(addllc->sender_mac, mac, ETH_ALEN);
...@@ -516,7 +674,13 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], ...@@ -516,7 +674,13 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
addllc->qp_mtu = min(link_new->path_mtu, addllc->qp_mtu = min(link_new->path_mtu,
link_new->peer_mtu); link_new->peer_mtu);
} }
if (ext && link_new)
len += smc_llc_fill_ext_v2(ext, link, link_new);
smc_llc_init_msg_hdr(&addllc->hd, link->lgr, len);
/* send llc message */ /* send llc message */
if (link->lgr->smc_version == SMC_V2)
rc = smc_wr_tx_v2_send(link, pend, len);
else
rc = smc_wr_tx_send(link, pend); rc = smc_wr_tx_send(link, pend);
put_out: put_out:
smc_wr_tx_link_put(link); smc_wr_tx_link_put(link);
...@@ -541,8 +705,8 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id, ...@@ -541,8 +705,8 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
delllc = (struct smc_llc_msg_del_link *)wr_buf; delllc = (struct smc_llc_msg_del_link *)wr_buf;
memset(delllc, 0, sizeof(*delllc)); memset(delllc, 0, sizeof(*delllc));
delllc->hd.common.type = SMC_LLC_DELETE_LINK; delllc->hd.common.llc_type = SMC_LLC_DELETE_LINK;
delllc->hd.length = sizeof(struct smc_llc_msg_del_link); smc_llc_init_msg_hdr(&delllc->hd, link->lgr, sizeof(*delllc));
if (reqresp == SMC_LLC_RESP) if (reqresp == SMC_LLC_RESP)
delllc->hd.flags |= SMC_LLC_FLAG_RESP; delllc->hd.flags |= SMC_LLC_FLAG_RESP;
if (orderly) if (orderly)
...@@ -574,8 +738,8 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16]) ...@@ -574,8 +738,8 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
goto put_out; goto put_out;
testllc = (struct smc_llc_msg_test_link *)wr_buf; testllc = (struct smc_llc_msg_test_link *)wr_buf;
memset(testllc, 0, sizeof(*testllc)); memset(testllc, 0, sizeof(*testllc));
testllc->hd.common.type = SMC_LLC_TEST_LINK; testllc->hd.common.llc_type = SMC_LLC_TEST_LINK;
testllc->hd.length = sizeof(struct smc_llc_msg_test_link); smc_llc_init_msg_hdr(&testllc->hd, link->lgr, sizeof(*testllc));
memcpy(testllc->user_data, user_data, sizeof(testllc->user_data)); memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
/* send llc message */ /* send llc message */
rc = smc_wr_tx_send(link, pend); rc = smc_wr_tx_send(link, pend);
...@@ -651,44 +815,6 @@ static int smc_llc_alloc_alt_link(struct smc_link_group *lgr, ...@@ -651,44 +815,6 @@ static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
return -EMLINK; return -EMLINK;
} }
/* return first buffer from any of the next buf lists */
static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr,
int *buf_lst)
{
struct smc_buf_desc *buf_pos;
while (*buf_lst < SMC_RMBE_SIZES) {
buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst],
struct smc_buf_desc, list);
if (buf_pos)
return buf_pos;
(*buf_lst)++;
}
return NULL;
}
/* return next rmb from buffer lists */
static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr,
int *buf_lst,
struct smc_buf_desc *buf_pos)
{
struct smc_buf_desc *buf_next;
if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) {
(*buf_lst)++;
return _smc_llc_get_next_rmb(lgr, buf_lst);
}
buf_next = list_next_entry(buf_pos, list);
return buf_next;
}
static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr,
int *buf_lst)
{
*buf_lst = 0;
return smc_llc_get_next_rmb(lgr, buf_lst, NULL);
}
/* send one add_link_continue msg */ /* send one add_link_continue msg */
static int smc_llc_add_link_cont(struct smc_link *link, static int smc_llc_add_link_cont(struct smc_link *link,
struct smc_link *link_new, u8 *num_rkeys_todo, struct smc_link *link_new, u8 *num_rkeys_todo,
...@@ -734,7 +860,7 @@ static int smc_llc_add_link_cont(struct smc_link *link, ...@@ -734,7 +860,7 @@ static int smc_llc_add_link_cont(struct smc_link *link,
while (*buf_pos && !(*buf_pos)->used) while (*buf_pos && !(*buf_pos)->used)
*buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos); *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
} }
addc_llc->hd.common.type = SMC_LLC_ADD_LINK_CONT; addc_llc->hd.common.llc_type = SMC_LLC_ADD_LINK_CONT;
addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont); addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont);
if (lgr->role == SMC_CLNT) if (lgr->role == SMC_CLNT)
addc_llc->hd.flags |= SMC_LLC_FLAG_RESP; addc_llc->hd.flags |= SMC_LLC_FLAG_RESP;
...@@ -793,6 +919,8 @@ static int smc_llc_cli_add_link_reject(struct smc_llc_qentry *qentry) ...@@ -793,6 +919,8 @@ static int smc_llc_cli_add_link_reject(struct smc_llc_qentry *qentry)
qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_ADD_LNK_REJ; qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
qentry->msg.raw.hdr.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH; qentry->msg.raw.hdr.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
smc_llc_init_msg_hdr(&qentry->msg.raw.hdr, qentry->link->lgr,
sizeof(qentry->msg));
return smc_llc_send_message(qentry->link, &qentry->msg); return smc_llc_send_message(qentry->link, &qentry->msg);
} }
...@@ -813,7 +941,7 @@ static int smc_llc_cli_conf_link(struct smc_link *link, ...@@ -813,7 +941,7 @@ static int smc_llc_cli_conf_link(struct smc_link *link,
SMC_LLC_DEL_LOST_PATH); SMC_LLC_DEL_LOST_PATH);
return -ENOLINK; return -ENOLINK;
} }
if (qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) { if (qentry->msg.raw.hdr.common.llc_type != SMC_LLC_CONFIRM_LINK) {
/* received DELETE_LINK instead */ /* received DELETE_LINK instead */
qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, &qentry->msg); smc_llc_send_message(link, &qentry->msg);
...@@ -854,6 +982,26 @@ static int smc_llc_cli_conf_link(struct smc_link *link, ...@@ -854,6 +982,26 @@ static int smc_llc_cli_conf_link(struct smc_link *link,
return 0; return 0;
} }
static void smc_llc_save_add_link_rkeys(struct smc_link *link,
struct smc_link *link_new)
{
struct smc_llc_msg_add_link_v2_ext *ext;
struct smc_link_group *lgr = link->lgr;
int max, i;
ext = (struct smc_llc_msg_add_link_v2_ext *)((u8 *)lgr->wr_rx_buf_v2 +
SMC_WR_TX_SIZE);
max = min_t(u8, ext->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2);
mutex_lock(&lgr->rmbs_lock);
for (i = 0; i < max; i++) {
smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
ext->rt[i].rmb_key,
ext->rt[i].rmb_vaddr_new,
ext->rt[i].rmb_key_new);
}
mutex_unlock(&lgr->rmbs_lock);
}
static void smc_llc_save_add_link_info(struct smc_link *link, static void smc_llc_save_add_link_info(struct smc_link *link,
struct smc_llc_msg_add_link *add_llc) struct smc_llc_msg_add_link *add_llc)
{ {
...@@ -870,31 +1018,47 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) ...@@ -870,31 +1018,47 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
struct smc_llc_msg_add_link *llc = &qentry->msg.add_link; struct smc_llc_msg_add_link *llc = &qentry->msg.add_link;
enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC; enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
struct smc_link_group *lgr = smc_get_lgr(link); struct smc_link_group *lgr = smc_get_lgr(link);
struct smc_init_info *ini = NULL;
struct smc_link *lnk_new = NULL; struct smc_link *lnk_new = NULL;
struct smc_init_info ini;
int lnk_idx, rc = 0; int lnk_idx, rc = 0;
if (!llc->qp_mtu) if (!llc->qp_mtu)
goto out_reject; goto out_reject;
ini.vlan_id = lgr->vlan_id; ini = kzalloc(sizeof(*ini), GFP_KERNEL);
smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); if (!ini) {
rc = -ENOMEM;
goto out_reject;
}
ini->vlan_id = lgr->vlan_id;
if (lgr->smc_version == SMC_V2) {
ini->check_smcrv2 = true;
ini->smcrv2.saddr = lgr->saddr;
ini->smcrv2.daddr = smc_ib_gid_to_ipv4(llc->sender_gid);
}
smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) && if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
!memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN)) { (lgr->smc_version == SMC_V2 ||
if (!ini.ib_dev) !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN))) {
if (!ini->ib_dev && !ini->smcrv2.ib_dev_v2)
goto out_reject; goto out_reject;
lgr_new_t = SMC_LGR_ASYMMETRIC_PEER; lgr_new_t = SMC_LGR_ASYMMETRIC_PEER;
} }
if (!ini.ib_dev) { if (lgr->smc_version == SMC_V2 && !ini->smcrv2.ib_dev_v2) {
lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
ini->smcrv2.ib_dev_v2 = link->smcibdev;
ini->smcrv2.ib_port_v2 = link->ibport;
} else if (lgr->smc_version < SMC_V2 && !ini->ib_dev) {
lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
ini.ib_dev = link->smcibdev; ini->ib_dev = link->smcibdev;
ini.ib_port = link->ibport; ini->ib_port = link->ibport;
} }
lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t); lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t);
if (lnk_idx < 0) if (lnk_idx < 0)
goto out_reject; goto out_reject;
lnk_new = &lgr->lnk[lnk_idx]; lnk_new = &lgr->lnk[lnk_idx];
rc = smcr_link_init(lgr, lnk_new, lnk_idx, &ini); rc = smcr_link_init(lgr, lnk_new, lnk_idx, ini);
if (rc) if (rc)
goto out_reject; goto out_reject;
smc_llc_save_add_link_info(lnk_new, llc); smc_llc_save_add_link_info(lnk_new, llc);
...@@ -910,16 +1074,20 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) ...@@ -910,16 +1074,20 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
goto out_clear_lnk; goto out_clear_lnk;
rc = smc_llc_send_add_link(link, rc = smc_llc_send_add_link(link,
lnk_new->smcibdev->mac[ini.ib_port - 1], lnk_new->smcibdev->mac[lnk_new->ibport - 1],
lnk_new->gid, lnk_new, SMC_LLC_RESP); lnk_new->gid, lnk_new, SMC_LLC_RESP);
if (rc) if (rc)
goto out_clear_lnk; goto out_clear_lnk;
if (lgr->smc_version == SMC_V2) {
smc_llc_save_add_link_rkeys(link, lnk_new);
} else {
rc = smc_llc_cli_rkey_exchange(link, lnk_new); rc = smc_llc_cli_rkey_exchange(link, lnk_new);
if (rc) { if (rc) {
rc = 0; rc = 0;
goto out_clear_lnk; goto out_clear_lnk;
} }
rc = smc_llc_cli_conf_link(link, &ini, lnk_new, lgr_new_t); }
rc = smc_llc_cli_conf_link(link, ini, lnk_new, lgr_new_t);
if (!rc) if (!rc)
goto out; goto out;
out_clear_lnk: out_clear_lnk:
...@@ -928,29 +1096,78 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) ...@@ -928,29 +1096,78 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
out_reject: out_reject:
smc_llc_cli_add_link_reject(qentry); smc_llc_cli_add_link_reject(qentry);
out: out:
kfree(ini);
kfree(qentry); kfree(qentry);
return rc; return rc;
} }
static void smc_llc_send_request_add_link(struct smc_link *link)
{
struct smc_llc_msg_req_add_link_v2 *llc;
struct smc_wr_tx_pend_priv *pend;
struct smc_wr_v2_buf *wr_buf;
struct smc_gidlist gidlist;
int rc, len, i;
if (!smc_wr_tx_link_hold(link))
return;
if (link->lgr->type == SMC_LGR_SYMMETRIC ||
link->lgr->type == SMC_LGR_ASYMMETRIC_PEER)
goto put_out;
smc_fill_gid_list(link->lgr, &gidlist, link->smcibdev, link->gid);
if (gidlist.len <= 1)
goto put_out;
rc = smc_llc_add_pending_send_v2(link, &wr_buf, &pend);
if (rc)
goto put_out;
llc = (struct smc_llc_msg_req_add_link_v2 *)wr_buf;
memset(llc, 0, SMC_WR_TX_SIZE);
llc->hd.common.llc_type = SMC_LLC_REQ_ADD_LINK;
for (i = 0; i < gidlist.len; i++)
memcpy(llc->gid[i], gidlist.list[i], sizeof(gidlist.list[0]));
llc->gid_cnt = gidlist.len;
len = sizeof(*llc) + (gidlist.len * sizeof(gidlist.list[0]));
smc_llc_init_msg_hdr(&llc->hd, link->lgr, len);
rc = smc_wr_tx_v2_send(link, pend, len);
if (!rc)
/* set REQ_ADD_LINK flow and wait for response from peer */
link->lgr->llc_flow_lcl.type = SMC_LLC_FLOW_REQ_ADD_LINK;
put_out:
smc_wr_tx_link_put(link);
}
/* as an SMC client, invite server to start the add_link processing */ /* as an SMC client, invite server to start the add_link processing */
static void smc_llc_cli_add_link_invite(struct smc_link *link, static void smc_llc_cli_add_link_invite(struct smc_link *link,
struct smc_llc_qentry *qentry) struct smc_llc_qentry *qentry)
{ {
struct smc_link_group *lgr = smc_get_lgr(link); struct smc_link_group *lgr = smc_get_lgr(link);
struct smc_init_info ini; struct smc_init_info *ini = NULL;
if (lgr->smc_version == SMC_V2) {
smc_llc_send_request_add_link(link);
goto out;
}
if (lgr->type == SMC_LGR_SYMMETRIC || if (lgr->type == SMC_LGR_SYMMETRIC ||
lgr->type == SMC_LGR_ASYMMETRIC_PEER) lgr->type == SMC_LGR_ASYMMETRIC_PEER)
goto out; goto out;
ini.vlan_id = lgr->vlan_id; ini = kzalloc(sizeof(*ini), GFP_KERNEL);
smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); if (!ini)
if (!ini.ib_dev) goto out;
ini->vlan_id = lgr->vlan_id;
smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
if (!ini->ib_dev)
goto out; goto out;
smc_llc_send_add_link(link, ini.ib_dev->mac[ini.ib_port - 1], smc_llc_send_add_link(link, ini->ib_dev->mac[ini->ib_port - 1],
ini.ib_gid, NULL, SMC_LLC_REQ); ini->ib_gid, NULL, SMC_LLC_REQ);
out: out:
kfree(ini);
kfree(qentry); kfree(qentry);
} }
...@@ -966,7 +1183,7 @@ static bool smc_llc_is_empty_llc_message(union smc_llc_msg *llc) ...@@ -966,7 +1183,7 @@ static bool smc_llc_is_empty_llc_message(union smc_llc_msg *llc)
static bool smc_llc_is_local_add_link(union smc_llc_msg *llc) static bool smc_llc_is_local_add_link(union smc_llc_msg *llc)
{ {
if (llc->raw.hdr.common.type == SMC_LLC_ADD_LINK && if (llc->raw.hdr.common.llc_type == SMC_LLC_ADD_LINK &&
smc_llc_is_empty_llc_message(llc)) smc_llc_is_empty_llc_message(llc))
return true; return true;
return false; return false;
...@@ -1133,7 +1350,7 @@ static int smc_llc_srv_conf_link(struct smc_link *link, ...@@ -1133,7 +1350,7 @@ static int smc_llc_srv_conf_link(struct smc_link *link,
/* receive CONFIRM LINK response over the RoCE fabric */ /* receive CONFIRM LINK response over the RoCE fabric */
qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, 0); qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, 0);
if (!qentry || if (!qentry ||
qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) { qentry->msg.raw.hdr.common.llc_type != SMC_LLC_CONFIRM_LINK) {
/* send DELETE LINK */ /* send DELETE LINK */
smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ, smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
false, SMC_LLC_DEL_LOST_PATH); false, SMC_LLC_DEL_LOST_PATH);
...@@ -1152,37 +1369,80 @@ static int smc_llc_srv_conf_link(struct smc_link *link, ...@@ -1152,37 +1369,80 @@ static int smc_llc_srv_conf_link(struct smc_link *link,
return 0; return 0;
} }
int smc_llc_srv_add_link(struct smc_link *link) static void smc_llc_send_req_add_link_response(struct smc_llc_qentry *qentry)
{
qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
smc_llc_init_msg_hdr(&qentry->msg.raw.hdr, qentry->link->lgr,
sizeof(qentry->msg));
memset(&qentry->msg.raw.data, 0, sizeof(qentry->msg.raw.data));
smc_llc_send_message(qentry->link, &qentry->msg);
}
int smc_llc_srv_add_link(struct smc_link *link,
struct smc_llc_qentry *req_qentry)
{ {
enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC; enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
struct smc_link_group *lgr = link->lgr; struct smc_link_group *lgr = link->lgr;
struct smc_llc_msg_add_link *add_llc; struct smc_llc_msg_add_link *add_llc;
struct smc_llc_qentry *qentry = NULL; struct smc_llc_qentry *qentry = NULL;
struct smc_link *link_new; bool send_req_add_link_resp = false;
struct smc_init_info ini; struct smc_link *link_new = NULL;
struct smc_init_info *ini = NULL;
int lnk_idx, rc = 0; int lnk_idx, rc = 0;
if (req_qentry &&
req_qentry->msg.raw.hdr.common.llc_type == SMC_LLC_REQ_ADD_LINK)
send_req_add_link_resp = true;
ini = kzalloc(sizeof(*ini), GFP_KERNEL);
if (!ini) {
rc = -ENOMEM;
goto out;
}
/* ignore client add link recommendation, start new flow */ /* ignore client add link recommendation, start new flow */
ini.vlan_id = lgr->vlan_id; ini->vlan_id = lgr->vlan_id;
smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); if (lgr->smc_version == SMC_V2) {
if (!ini.ib_dev) { ini->check_smcrv2 = true;
ini->smcrv2.saddr = lgr->saddr;
if (send_req_add_link_resp) {
struct smc_llc_msg_req_add_link_v2 *req_add =
&req_qentry->msg.req_add_link;
ini->smcrv2.daddr = smc_ib_gid_to_ipv4(req_add->gid[0]);
}
}
smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
if (lgr->smc_version == SMC_V2 && !ini->smcrv2.ib_dev_v2) {
lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
ini->smcrv2.ib_dev_v2 = link->smcibdev;
ini->smcrv2.ib_port_v2 = link->ibport;
} else if (lgr->smc_version < SMC_V2 && !ini->ib_dev) {
lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
ini.ib_dev = link->smcibdev; ini->ib_dev = link->smcibdev;
ini.ib_port = link->ibport; ini->ib_port = link->ibport;
} }
lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t); lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t);
if (lnk_idx < 0) if (lnk_idx < 0) {
return 0; rc = 0;
goto out;
}
rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, &ini); rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, ini);
if (rc) if (rc)
return rc; goto out;
link_new = &lgr->lnk[lnk_idx]; link_new = &lgr->lnk[lnk_idx];
rc = smcr_buf_map_lgr(link_new);
if (rc)
goto out_err;
rc = smc_llc_send_add_link(link, rc = smc_llc_send_add_link(link,
link_new->smcibdev->mac[ini.ib_port - 1], link_new->smcibdev->mac[link_new->ibport-1],
link_new->gid, link_new, SMC_LLC_REQ); link_new->gid, link_new, SMC_LLC_REQ);
if (rc) if (rc)
goto out_err; goto out_err;
send_req_add_link_resp = false;
/* receive ADD LINK response over the RoCE fabric */ /* receive ADD LINK response over the RoCE fabric */
qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME, SMC_LLC_ADD_LINK); qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME, SMC_LLC_ADD_LINK);
if (!qentry) { if (!qentry) {
...@@ -1197,48 +1457,59 @@ int smc_llc_srv_add_link(struct smc_link *link) ...@@ -1197,48 +1457,59 @@ int smc_llc_srv_add_link(struct smc_link *link)
} }
if (lgr->type == SMC_LGR_SINGLE && if (lgr->type == SMC_LGR_SINGLE &&
(!memcmp(add_llc->sender_gid, link->peer_gid, SMC_GID_SIZE) && (!memcmp(add_llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
!memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN))) { (lgr->smc_version == SMC_V2 ||
!memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN)))) {
lgr_new_t = SMC_LGR_ASYMMETRIC_PEER; lgr_new_t = SMC_LGR_ASYMMETRIC_PEER;
} }
smc_llc_save_add_link_info(link_new, add_llc); smc_llc_save_add_link_info(link_new, add_llc);
smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
rc = smc_ib_ready_link(link_new); rc = smc_ib_ready_link(link_new);
if (rc)
goto out_err;
rc = smcr_buf_map_lgr(link_new);
if (rc) if (rc)
goto out_err; goto out_err;
rc = smcr_buf_reg_lgr(link_new); rc = smcr_buf_reg_lgr(link_new);
if (rc) if (rc)
goto out_err; goto out_err;
if (lgr->smc_version == SMC_V2) {
smc_llc_save_add_link_rkeys(link, link_new);
} else {
rc = smc_llc_srv_rkey_exchange(link, link_new); rc = smc_llc_srv_rkey_exchange(link, link_new);
if (rc) if (rc)
goto out_err; goto out_err;
}
rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t); rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t);
if (rc) if (rc)
goto out_err; goto out_err;
kfree(ini);
return 0; return 0;
out_err: out_err:
if (link_new) {
link_new->state = SMC_LNK_INACTIVE; link_new->state = SMC_LNK_INACTIVE;
smcr_link_clear(link_new, false); smcr_link_clear(link_new, false);
}
out:
kfree(ini);
if (send_req_add_link_resp)
smc_llc_send_req_add_link_response(req_qentry);
return rc; return rc;
} }
static void smc_llc_process_srv_add_link(struct smc_link_group *lgr) static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
{ {
struct smc_link *link = lgr->llc_flow_lcl.qentry->link; struct smc_link *link = lgr->llc_flow_lcl.qentry->link;
struct smc_llc_qentry *qentry;
int rc; int rc;
smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
mutex_lock(&lgr->llc_conf_mutex); mutex_lock(&lgr->llc_conf_mutex);
rc = smc_llc_srv_add_link(link); rc = smc_llc_srv_add_link(link, qentry);
if (!rc && lgr->type == SMC_LGR_SYMMETRIC) { if (!rc && lgr->type == SMC_LGR_SYMMETRIC) {
/* delete any asymmetric link */ /* delete any asymmetric link */
smc_llc_delete_asym_link(lgr); smc_llc_delete_asym_link(lgr);
} }
mutex_unlock(&lgr->llc_conf_mutex); mutex_unlock(&lgr->llc_conf_mutex);
kfree(qentry);
} }
/* enqueue a local add_link req to trigger a new add_link flow */ /* enqueue a local add_link req to trigger a new add_link flow */
...@@ -1246,8 +1517,8 @@ void smc_llc_add_link_local(struct smc_link *link) ...@@ -1246,8 +1517,8 @@ void smc_llc_add_link_local(struct smc_link *link)
{ {
struct smc_llc_msg_add_link add_llc = {}; struct smc_llc_msg_add_link add_llc = {};
add_llc.hd.length = sizeof(add_llc); add_llc.hd.common.llc_type = SMC_LLC_ADD_LINK;
add_llc.hd.common.type = SMC_LLC_ADD_LINK; smc_llc_init_msg_hdr(&add_llc.hd, link->lgr, sizeof(add_llc));
/* no dev and port needed */ /* no dev and port needed */
smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc); smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc);
} }
...@@ -1269,6 +1540,7 @@ static void smc_llc_add_link_work(struct work_struct *work) ...@@ -1269,6 +1540,7 @@ static void smc_llc_add_link_work(struct work_struct *work)
else else
smc_llc_process_srv_add_link(lgr); smc_llc_process_srv_add_link(lgr);
out: out:
if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_REQ_ADD_LINK)
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
} }
...@@ -1279,8 +1551,8 @@ void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id) ...@@ -1279,8 +1551,8 @@ void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id)
{ {
struct smc_llc_msg_del_link del_llc = {}; struct smc_llc_msg_del_link del_llc = {};
del_llc.hd.length = sizeof(del_llc); del_llc.hd.common.llc_type = SMC_LLC_DELETE_LINK;
del_llc.hd.common.type = SMC_LLC_DELETE_LINK; smc_llc_init_msg_hdr(&del_llc.hd, link->lgr, sizeof(del_llc));
del_llc.link_num = del_link_id; del_llc.link_num = del_link_id;
del_llc.reason = htonl(SMC_LLC_DEL_LOST_PATH); del_llc.reason = htonl(SMC_LLC_DEL_LOST_PATH);
del_llc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; del_llc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
...@@ -1350,8 +1622,8 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) ...@@ -1350,8 +1622,8 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
struct smc_llc_msg_del_link delllc = {}; struct smc_llc_msg_del_link delllc = {};
int i; int i;
delllc.hd.common.type = SMC_LLC_DELETE_LINK; delllc.hd.common.llc_type = SMC_LLC_DELETE_LINK;
delllc.hd.length = sizeof(delllc); smc_llc_init_msg_hdr(&delllc.hd, lgr, sizeof(delllc));
if (ord) if (ord)
delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
...@@ -1467,6 +1739,8 @@ static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr) ...@@ -1467,6 +1739,8 @@ static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr)
link = qentry->link; link = qentry->link;
num_entries = llc->rtoken[0].num_rkeys; num_entries = llc->rtoken[0].num_rkeys;
if (num_entries > SMC_LLC_RKEYS_PER_MSG)
goto out_err;
/* first rkey entry is for receiving link */ /* first rkey entry is for receiving link */
rk_idx = smc_rtoken_add(link, rk_idx = smc_rtoken_add(link,
llc->rtoken[0].rmb_vaddr, llc->rtoken[0].rmb_vaddr,
...@@ -1485,6 +1759,7 @@ static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr) ...@@ -1485,6 +1759,7 @@ static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr)
llc->hd.flags |= SMC_LLC_FLAG_RKEY_RETRY; llc->hd.flags |= SMC_LLC_FLAG_RKEY_RETRY;
out: out:
llc->hd.flags |= SMC_LLC_FLAG_RESP; llc->hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc));
smc_llc_send_message(link, &qentry->msg); smc_llc_send_message(link, &qentry->msg);
smc_llc_flow_qentry_del(&lgr->llc_flow_rmt); smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
} }
...@@ -1502,6 +1777,28 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr) ...@@ -1502,6 +1777,28 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
llc = &qentry->msg.delete_rkey; llc = &qentry->msg.delete_rkey;
link = qentry->link; link = qentry->link;
if (lgr->smc_version == SMC_V2) {
struct smc_llc_msg_delete_rkey_v2 *llcv2;
memcpy(lgr->wr_rx_buf_v2, llc, sizeof(*llc));
llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)lgr->wr_rx_buf_v2;
llcv2->num_inval_rkeys = 0;
max = min_t(u8, llcv2->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2);
for (i = 0; i < max; i++) {
if (smc_rtoken_delete(link, llcv2->rkey[i]))
llcv2->num_inval_rkeys++;
}
memset(&llc->rkey[0], 0, sizeof(llc->rkey));
memset(&llc->reserved2, 0, sizeof(llc->reserved2));
smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc));
if (llcv2->num_inval_rkeys) {
llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
llc->err_mask = llcv2->num_inval_rkeys;
}
goto finish;
}
max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX); max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
for (i = 0; i < max; i++) { for (i = 0; i < max; i++) {
if (smc_rtoken_delete(link, llc->rkey[i])) if (smc_rtoken_delete(link, llc->rkey[i]))
...@@ -1511,6 +1808,7 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr) ...@@ -1511,6 +1808,7 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
llc->err_mask = err_mask; llc->err_mask = err_mask;
} }
finish:
llc->hd.flags |= SMC_LLC_FLAG_RESP; llc->hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, &qentry->msg); smc_llc_send_message(link, &qentry->msg);
smc_llc_flow_qentry_del(&lgr->llc_flow_rmt); smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
...@@ -1546,7 +1844,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) ...@@ -1546,7 +1844,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
if (!smc_link_usable(link)) if (!smc_link_usable(link))
goto out; goto out;
switch (llc->raw.hdr.common.type) { switch (llc->raw.hdr.common.llc_type) {
case SMC_LLC_TEST_LINK: case SMC_LLC_TEST_LINK:
llc->test_link.hd.flags |= SMC_LLC_FLAG_RESP; llc->test_link.hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, llc); smc_llc_send_message(link, llc);
...@@ -1571,8 +1869,18 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) ...@@ -1571,8 +1869,18 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
qentry); qentry);
wake_up(&lgr->llc_msg_waiter); wake_up(&lgr->llc_msg_waiter);
} else if (smc_llc_flow_start(&lgr->llc_flow_lcl, return;
qentry)) { }
if (lgr->llc_flow_lcl.type ==
SMC_LLC_FLOW_REQ_ADD_LINK) {
/* server started add_link processing */
lgr->llc_flow_lcl.type = SMC_LLC_FLOW_ADD_LINK;
smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
qentry);
schedule_work(&lgr->llc_add_link_work);
return;
}
if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
schedule_work(&lgr->llc_add_link_work); schedule_work(&lgr->llc_add_link_work);
} }
} else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
...@@ -1620,6 +1928,23 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) ...@@ -1620,6 +1928,23 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt); smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
} }
return; return;
case SMC_LLC_REQ_ADD_LINK:
/* handle response here, smc_llc_flow_stop() cannot be called
* in tasklet context
*/
if (lgr->role == SMC_CLNT &&
lgr->llc_flow_lcl.type == SMC_LLC_FLOW_REQ_ADD_LINK &&
(llc->raw.hdr.flags & SMC_LLC_FLAG_RESP)) {
smc_llc_flow_stop(link->lgr, &lgr->llc_flow_lcl);
} else if (lgr->role == SMC_SERV) {
if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
/* as smc server, handle client suggestion */
lgr->llc_flow_lcl.type = SMC_LLC_FLOW_ADD_LINK;
schedule_work(&lgr->llc_add_link_work);
}
return;
}
break;
default: default:
smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type); smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type);
break; break;
...@@ -1663,7 +1988,7 @@ static void smc_llc_rx_response(struct smc_link *link, ...@@ -1663,7 +1988,7 @@ static void smc_llc_rx_response(struct smc_link *link,
{ {
enum smc_llc_flowtype flowtype = link->lgr->llc_flow_lcl.type; enum smc_llc_flowtype flowtype = link->lgr->llc_flow_lcl.type;
struct smc_llc_flow *flow = &link->lgr->llc_flow_lcl; struct smc_llc_flow *flow = &link->lgr->llc_flow_lcl;
u8 llc_type = qentry->msg.raw.hdr.common.type; u8 llc_type = qentry->msg.raw.hdr.common.llc_type;
switch (llc_type) { switch (llc_type) {
case SMC_LLC_TEST_LINK: case SMC_LLC_TEST_LINK:
...@@ -1689,7 +2014,8 @@ static void smc_llc_rx_response(struct smc_link *link, ...@@ -1689,7 +2014,8 @@ static void smc_llc_rx_response(struct smc_link *link,
/* not used because max links is 3 */ /* not used because max links is 3 */
break; break;
default: default:
smc_llc_protocol_violation(link->lgr, llc_type); smc_llc_protocol_violation(link->lgr,
qentry->msg.raw.hdr.common.type);
break; break;
} }
kfree(qentry); kfree(qentry);
...@@ -1714,7 +2040,8 @@ static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc) ...@@ -1714,7 +2040,8 @@ static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc)
memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg)); memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg));
/* process responses immediately */ /* process responses immediately */
if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) { if ((llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) &&
llc->raw.hdr.common.llc_type != SMC_LLC_REQ_ADD_LINK) {
smc_llc_rx_response(link, qentry); smc_llc_rx_response(link, qentry);
return; return;
} }
...@@ -1734,8 +2061,13 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) ...@@ -1734,8 +2061,13 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
if (wc->byte_len < sizeof(*llc)) if (wc->byte_len < sizeof(*llc))
return; /* short message */ return; /* short message */
if (!llc->raw.hdr.common.llc_version) {
if (llc->raw.hdr.length != sizeof(*llc)) if (llc->raw.hdr.length != sizeof(*llc))
return; /* invalid message */ return; /* invalid message */
} else {
if (llc->raw.hdr.length_v2 < sizeof(*llc))
return; /* invalid message */
}
smc_llc_enqueue(link, llc); smc_llc_enqueue(link, llc);
} }
...@@ -1954,6 +2286,35 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = { ...@@ -1954,6 +2286,35 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
.handler = smc_llc_rx_handler, .handler = smc_llc_rx_handler,
.type = SMC_LLC_DELETE_RKEY .type = SMC_LLC_DELETE_RKEY
}, },
/* V2 types */
{
.handler = smc_llc_rx_handler,
.type = SMC_LLC_CONFIRM_LINK_V2
},
{
.handler = smc_llc_rx_handler,
.type = SMC_LLC_TEST_LINK_V2
},
{
.handler = smc_llc_rx_handler,
.type = SMC_LLC_ADD_LINK_V2
},
{
.handler = smc_llc_rx_handler,
.type = SMC_LLC_DELETE_LINK_V2
},
{
.handler = smc_llc_rx_handler,
.type = SMC_LLC_REQ_ADD_LINK_V2
},
{
.handler = smc_llc_rx_handler,
.type = SMC_LLC_CONFIRM_RKEY_V2
},
{
.handler = smc_llc_rx_handler,
.type = SMC_LLC_DELETE_RKEY_V2
},
{ {
.handler = NULL, .handler = NULL,
} }
......
...@@ -30,10 +30,19 @@ enum smc_llc_msg_type { ...@@ -30,10 +30,19 @@ enum smc_llc_msg_type {
SMC_LLC_ADD_LINK = 0x02, SMC_LLC_ADD_LINK = 0x02,
SMC_LLC_ADD_LINK_CONT = 0x03, SMC_LLC_ADD_LINK_CONT = 0x03,
SMC_LLC_DELETE_LINK = 0x04, SMC_LLC_DELETE_LINK = 0x04,
SMC_LLC_REQ_ADD_LINK = 0x05,
SMC_LLC_CONFIRM_RKEY = 0x06, SMC_LLC_CONFIRM_RKEY = 0x06,
SMC_LLC_TEST_LINK = 0x07, SMC_LLC_TEST_LINK = 0x07,
SMC_LLC_CONFIRM_RKEY_CONT = 0x08, SMC_LLC_CONFIRM_RKEY_CONT = 0x08,
SMC_LLC_DELETE_RKEY = 0x09, SMC_LLC_DELETE_RKEY = 0x09,
/* V2 types */
SMC_LLC_CONFIRM_LINK_V2 = 0x21,
SMC_LLC_ADD_LINK_V2 = 0x22,
SMC_LLC_DELETE_LINK_V2 = 0x24,
SMC_LLC_REQ_ADD_LINK_V2 = 0x25,
SMC_LLC_CONFIRM_RKEY_V2 = 0x26,
SMC_LLC_TEST_LINK_V2 = 0x27,
SMC_LLC_DELETE_RKEY_V2 = 0x29,
}; };
#define smc_link_downing(state) \ #define smc_link_downing(state) \
...@@ -102,7 +111,8 @@ void smc_llc_flow_qentry_del(struct smc_llc_flow *flow); ...@@ -102,7 +111,8 @@ void smc_llc_flow_qentry_del(struct smc_llc_flow *flow);
void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord,
u32 rsn); u32 rsn);
int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry); int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry);
int smc_llc_srv_add_link(struct smc_link *link); int smc_llc_srv_add_link(struct smc_link *link,
struct smc_llc_qentry *req_qentry);
void smc_llc_add_link_local(struct smc_link *link); void smc_llc_add_link_local(struct smc_link *link);
int smc_llc_init(void) __init; int smc_llc_init(void) __init;
......
...@@ -953,6 +953,26 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, ...@@ -953,6 +953,26 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
return rc; return rc;
} }
static int smc_pnet_determine_gid(struct smc_ib_device *ibdev, int i,
struct smc_init_info *ini)
{
if (!ini->check_smcrv2 &&
!smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->ib_gid, NULL,
NULL)) {
ini->ib_dev = ibdev;
ini->ib_port = i;
return 0;
}
if (ini->check_smcrv2 &&
!smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->smcrv2.ib_gid_v2,
NULL, &ini->smcrv2)) {
ini->smcrv2.ib_dev_v2 = ibdev;
ini->smcrv2.ib_port_v2 = i;
return 0;
}
return -ENODEV;
}
/* find a roce device for the given pnetid */ /* find a roce device for the given pnetid */
static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
struct smc_init_info *ini, struct smc_init_info *ini,
...@@ -961,7 +981,6 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, ...@@ -961,7 +981,6 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
struct smc_ib_device *ibdev; struct smc_ib_device *ibdev;
int i; int i;
ini->ib_dev = NULL;
mutex_lock(&smc_ib_devices.mutex); mutex_lock(&smc_ib_devices.mutex);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) { list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
if (ibdev == known_dev) if (ibdev == known_dev)
...@@ -971,11 +990,8 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, ...@@ -971,11 +990,8 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
continue; continue;
if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) && if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) &&
smc_ib_port_active(ibdev, i) && smc_ib_port_active(ibdev, i) &&
!test_bit(i - 1, ibdev->ports_going_away) && !test_bit(i - 1, ibdev->ports_going_away)) {
!smc_ib_determine_gid(ibdev, i, ini->vlan_id, if (!smc_pnet_determine_gid(ibdev, i, ini))
ini->ib_gid, NULL)) {
ini->ib_dev = ibdev;
ini->ib_port = i;
goto out; goto out;
} }
} }
...@@ -1016,11 +1032,8 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, ...@@ -1016,11 +1032,8 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
dev_put(ndev); dev_put(ndev);
if (netdev == ndev && if (netdev == ndev &&
smc_ib_port_active(ibdev, i) && smc_ib_port_active(ibdev, i) &&
!test_bit(i - 1, ibdev->ports_going_away) && !test_bit(i - 1, ibdev->ports_going_away)) {
!smc_ib_determine_gid(ibdev, i, ini->vlan_id, if (!smc_pnet_determine_gid(ibdev, i, ini))
ini->ib_gid, NULL)) {
ini->ib_dev = ibdev;
ini->ib_port = i;
break; break;
} }
} }
...@@ -1083,8 +1096,6 @@ void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini) ...@@ -1083,8 +1096,6 @@ void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini)
{ {
struct dst_entry *dst = sk_dst_get(sk); struct dst_entry *dst = sk_dst_get(sk);
ini->ib_dev = NULL;
ini->ib_port = 0;
if (!dst) if (!dst)
goto out; goto out;
if (!dst->dev) if (!dst->dev)
......
...@@ -101,12 +101,23 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) ...@@ -101,12 +101,23 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
} }
pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id); pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id);
if (pnd_snd_idx == link->wr_tx_cnt) if (pnd_snd_idx == link->wr_tx_cnt) {
if (link->lgr->smc_version != SMC_V2 ||
link->wr_tx_v2_pend->wr_id != wc->wr_id)
return; return;
link->wr_tx_v2_pend->wc_status = wc->status;
memcpy(&pnd_snd, link->wr_tx_v2_pend, sizeof(pnd_snd));
/* clear the full struct smc_wr_tx_pend including .priv */
memset(link->wr_tx_v2_pend, 0,
sizeof(*link->wr_tx_v2_pend));
memset(link->lgr->wr_tx_buf_v2, 0,
sizeof(*link->lgr->wr_tx_buf_v2));
} else {
link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status; link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status;
if (link->wr_tx_pends[pnd_snd_idx].compl_requested) if (link->wr_tx_pends[pnd_snd_idx].compl_requested)
complete(&link->wr_tx_compl[pnd_snd_idx]); complete(&link->wr_tx_compl[pnd_snd_idx]);
memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd)); memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx],
sizeof(pnd_snd));
/* clear the full struct smc_wr_tx_pend including .priv */ /* clear the full struct smc_wr_tx_pend including .priv */
memset(&link->wr_tx_pends[pnd_snd_idx], 0, memset(&link->wr_tx_pends[pnd_snd_idx], 0,
sizeof(link->wr_tx_pends[pnd_snd_idx])); sizeof(link->wr_tx_pends[pnd_snd_idx]));
...@@ -114,6 +125,8 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) ...@@ -114,6 +125,8 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
sizeof(link->wr_tx_bufs[pnd_snd_idx])); sizeof(link->wr_tx_bufs[pnd_snd_idx]));
if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask)) if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
return; return;
}
if (wc->status) { if (wc->status) {
for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
/* clear full struct smc_wr_tx_pend including .priv */ /* clear full struct smc_wr_tx_pend including .priv */
...@@ -123,6 +136,12 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) ...@@ -123,6 +136,12 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
sizeof(link->wr_tx_bufs[i])); sizeof(link->wr_tx_bufs[i]));
clear_bit(i, link->wr_tx_mask); clear_bit(i, link->wr_tx_mask);
} }
if (link->lgr->smc_version == SMC_V2) {
memset(link->wr_tx_v2_pend, 0,
sizeof(*link->wr_tx_v2_pend));
memset(link->lgr->wr_tx_buf_v2, 0,
sizeof(*link->lgr->wr_tx_buf_v2));
}
/* terminate link */ /* terminate link */
smcr_link_down_cond_sched(link); smcr_link_down_cond_sched(link);
} }
...@@ -239,6 +258,33 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, ...@@ -239,6 +258,33 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
return 0; return 0;
} }
int smc_wr_tx_get_v2_slot(struct smc_link *link,
smc_wr_tx_handler handler,
struct smc_wr_v2_buf **wr_buf,
struct smc_wr_tx_pend_priv **wr_pend_priv)
{
struct smc_wr_tx_pend *wr_pend;
struct ib_send_wr *wr_ib;
u64 wr_id;
if (link->wr_tx_v2_pend->idx == link->wr_tx_cnt)
return -EBUSY;
*wr_buf = NULL;
*wr_pend_priv = NULL;
wr_id = smc_wr_tx_get_next_wr_id(link);
wr_pend = link->wr_tx_v2_pend;
wr_pend->wr_id = wr_id;
wr_pend->handler = handler;
wr_pend->link = link;
wr_pend->idx = link->wr_tx_cnt;
wr_ib = link->wr_tx_v2_ib;
wr_ib->wr_id = wr_id;
*wr_buf = link->lgr->wr_tx_buf_v2;
*wr_pend_priv = &wr_pend->priv;
return 0;
}
int smc_wr_tx_put_slot(struct smc_link *link, int smc_wr_tx_put_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv) struct smc_wr_tx_pend_priv *wr_pend_priv)
{ {
...@@ -256,6 +302,14 @@ int smc_wr_tx_put_slot(struct smc_link *link, ...@@ -256,6 +302,14 @@ int smc_wr_tx_put_slot(struct smc_link *link,
test_and_clear_bit(idx, link->wr_tx_mask); test_and_clear_bit(idx, link->wr_tx_mask);
wake_up(&link->wr_tx_wait); wake_up(&link->wr_tx_wait);
return 1; return 1;
} else if (link->lgr->smc_version == SMC_V2 &&
pend->idx == link->wr_tx_cnt) {
/* Large v2 buffer */
memset(&link->wr_tx_v2_pend, 0,
sizeof(link->wr_tx_v2_pend));
memset(&link->lgr->wr_tx_buf_v2, 0,
sizeof(link->lgr->wr_tx_buf_v2));
return 1;
} }
return 0; return 0;
...@@ -280,6 +334,22 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) ...@@ -280,6 +334,22 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
return rc; return rc;
} }
int smc_wr_tx_v2_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
int len)
{
int rc;
link->wr_tx_v2_ib->sg_list[0].length = len;
ib_req_notify_cq(link->smcibdev->roce_cq_send,
IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
rc = ib_post_send(link->roce_qp, link->wr_tx_v2_ib, NULL);
if (rc) {
smc_wr_tx_put_slot(link, priv);
smcr_link_down_cond_sched(link);
}
return rc;
}
/* Send prepared WR slot via ib_post_send and wait for send completion /* Send prepared WR slot via ib_post_send and wait for send completion
* notification. * notification.
* @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer
...@@ -517,6 +587,7 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk) ...@@ -517,6 +587,7 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk)
static void smc_wr_init_sge(struct smc_link *lnk) static void smc_wr_init_sge(struct smc_link *lnk)
{ {
int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1;
u32 i; u32 i;
for (i = 0; i < lnk->wr_tx_cnt; i++) { for (i = 0; i < lnk->wr_tx_cnt; i++) {
...@@ -545,14 +616,44 @@ static void smc_wr_init_sge(struct smc_link *lnk) ...@@ -545,14 +616,44 @@ static void smc_wr_init_sge(struct smc_link *lnk)
lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list = lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list =
lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge; lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge;
} }
if (lnk->lgr->smc_version == SMC_V2) {
lnk->wr_tx_v2_sge->addr = lnk->wr_tx_v2_dma_addr;
lnk->wr_tx_v2_sge->length = SMC_WR_BUF_V2_SIZE;
lnk->wr_tx_v2_sge->lkey = lnk->roce_pd->local_dma_lkey;
lnk->wr_tx_v2_ib->next = NULL;
lnk->wr_tx_v2_ib->sg_list = lnk->wr_tx_v2_sge;
lnk->wr_tx_v2_ib->num_sge = 1;
lnk->wr_tx_v2_ib->opcode = IB_WR_SEND;
lnk->wr_tx_v2_ib->send_flags =
IB_SEND_SIGNALED | IB_SEND_SOLICITED;
}
/* With SMC-Rv2 there can be messages larger than SMC_WR_TX_SIZE.
* Each ib_recv_wr gets 2 sges, the second one is a spillover buffer
* and the same buffer for all sges. When a larger message arrived then
* the content of the first small sge is copied to the beginning of
* the larger spillover buffer, allowing easy data mapping.
*/
for (i = 0; i < lnk->wr_rx_cnt; i++) { for (i = 0; i < lnk->wr_rx_cnt; i++) {
lnk->wr_rx_sges[i].addr = int x = i * sges_per_buf;
lnk->wr_rx_sges[x].addr =
lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE; lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE;
lnk->wr_rx_sges[i].length = SMC_WR_BUF_SIZE; lnk->wr_rx_sges[x].length = SMC_WR_TX_SIZE;
lnk->wr_rx_sges[i].lkey = lnk->roce_pd->local_dma_lkey; lnk->wr_rx_sges[x].lkey = lnk->roce_pd->local_dma_lkey;
if (lnk->lgr->smc_version == SMC_V2) {
lnk->wr_rx_sges[x + 1].addr =
lnk->wr_rx_v2_dma_addr + SMC_WR_TX_SIZE;
lnk->wr_rx_sges[x + 1].length =
SMC_WR_BUF_V2_SIZE - SMC_WR_TX_SIZE;
lnk->wr_rx_sges[x + 1].lkey =
lnk->roce_pd->local_dma_lkey;
}
lnk->wr_rx_ibs[i].next = NULL; lnk->wr_rx_ibs[i].next = NULL;
lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i]; lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[x];
lnk->wr_rx_ibs[i].num_sge = 1; lnk->wr_rx_ibs[i].num_sge = sges_per_buf;
} }
lnk->wr_reg.wr.next = NULL; lnk->wr_reg.wr.next = NULL;
lnk->wr_reg.wr.num_sge = 0; lnk->wr_reg.wr.num_sge = 0;
...@@ -585,16 +686,45 @@ void smc_wr_free_link(struct smc_link *lnk) ...@@ -585,16 +686,45 @@ void smc_wr_free_link(struct smc_link *lnk)
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
lnk->wr_rx_dma_addr = 0; lnk->wr_rx_dma_addr = 0;
} }
if (lnk->wr_rx_v2_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr,
SMC_WR_BUF_V2_SIZE,
DMA_FROM_DEVICE);
lnk->wr_rx_v2_dma_addr = 0;
}
if (lnk->wr_tx_dma_addr) { if (lnk->wr_tx_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_tx_dma_addr, ib_dma_unmap_single(ibdev, lnk->wr_tx_dma_addr,
SMC_WR_BUF_SIZE * lnk->wr_tx_cnt, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
DMA_TO_DEVICE); DMA_TO_DEVICE);
lnk->wr_tx_dma_addr = 0; lnk->wr_tx_dma_addr = 0;
} }
if (lnk->wr_tx_v2_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_tx_v2_dma_addr,
SMC_WR_BUF_V2_SIZE,
DMA_TO_DEVICE);
lnk->wr_tx_v2_dma_addr = 0;
}
}
void smc_wr_free_lgr_mem(struct smc_link_group *lgr)
{
if (lgr->smc_version < SMC_V2)
return;
kfree(lgr->wr_rx_buf_v2);
lgr->wr_rx_buf_v2 = NULL;
kfree(lgr->wr_tx_buf_v2);
lgr->wr_tx_buf_v2 = NULL;
} }
void smc_wr_free_link_mem(struct smc_link *lnk) void smc_wr_free_link_mem(struct smc_link *lnk)
{ {
kfree(lnk->wr_tx_v2_ib);
lnk->wr_tx_v2_ib = NULL;
kfree(lnk->wr_tx_v2_sge);
lnk->wr_tx_v2_sge = NULL;
kfree(lnk->wr_tx_v2_pend);
lnk->wr_tx_v2_pend = NULL;
kfree(lnk->wr_tx_compl); kfree(lnk->wr_tx_compl);
lnk->wr_tx_compl = NULL; lnk->wr_tx_compl = NULL;
kfree(lnk->wr_tx_pends); kfree(lnk->wr_tx_pends);
...@@ -619,8 +749,26 @@ void smc_wr_free_link_mem(struct smc_link *lnk) ...@@ -619,8 +749,26 @@ void smc_wr_free_link_mem(struct smc_link *lnk)
lnk->wr_rx_bufs = NULL; lnk->wr_rx_bufs = NULL;
} }
int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr)
{
if (lgr->smc_version < SMC_V2)
return 0;
lgr->wr_rx_buf_v2 = kzalloc(SMC_WR_BUF_V2_SIZE, GFP_KERNEL);
if (!lgr->wr_rx_buf_v2)
return -ENOMEM;
lgr->wr_tx_buf_v2 = kzalloc(SMC_WR_BUF_V2_SIZE, GFP_KERNEL);
if (!lgr->wr_tx_buf_v2) {
kfree(lgr->wr_rx_buf_v2);
return -ENOMEM;
}
return 0;
}
int smc_wr_alloc_link_mem(struct smc_link *link) int smc_wr_alloc_link_mem(struct smc_link *link)
{ {
int sges_per_buf = link->lgr->smc_version == SMC_V2 ? 2 : 1;
/* allocate link related memory */ /* allocate link related memory */
link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL); link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL);
if (!link->wr_tx_bufs) if (!link->wr_tx_bufs)
...@@ -653,7 +801,7 @@ int smc_wr_alloc_link_mem(struct smc_link *link) ...@@ -653,7 +801,7 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
if (!link->wr_tx_sges) if (!link->wr_tx_sges)
goto no_mem_wr_tx_rdma_sges; goto no_mem_wr_tx_rdma_sges;
link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3, link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
sizeof(link->wr_rx_sges[0]), sizeof(link->wr_rx_sges[0]) * sges_per_buf,
GFP_KERNEL); GFP_KERNEL);
if (!link->wr_rx_sges) if (!link->wr_rx_sges)
goto no_mem_wr_tx_sges; goto no_mem_wr_tx_sges;
...@@ -672,8 +820,29 @@ int smc_wr_alloc_link_mem(struct smc_link *link) ...@@ -672,8 +820,29 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
GFP_KERNEL); GFP_KERNEL);
if (!link->wr_tx_compl) if (!link->wr_tx_compl)
goto no_mem_wr_tx_pends; goto no_mem_wr_tx_pends;
if (link->lgr->smc_version == SMC_V2) {
link->wr_tx_v2_ib = kzalloc(sizeof(*link->wr_tx_v2_ib),
GFP_KERNEL);
if (!link->wr_tx_v2_ib)
goto no_mem_tx_compl;
link->wr_tx_v2_sge = kzalloc(sizeof(*link->wr_tx_v2_sge),
GFP_KERNEL);
if (!link->wr_tx_v2_sge)
goto no_mem_v2_ib;
link->wr_tx_v2_pend = kzalloc(sizeof(*link->wr_tx_v2_pend),
GFP_KERNEL);
if (!link->wr_tx_v2_pend)
goto no_mem_v2_sge;
}
return 0; return 0;
no_mem_v2_sge:
kfree(link->wr_tx_v2_sge);
no_mem_v2_ib:
kfree(link->wr_tx_v2_ib);
no_mem_tx_compl:
kfree(link->wr_tx_compl);
no_mem_wr_tx_pends: no_mem_wr_tx_pends:
kfree(link->wr_tx_pends); kfree(link->wr_tx_pends);
no_mem_wr_tx_mask: no_mem_wr_tx_mask:
...@@ -725,6 +894,24 @@ int smc_wr_create_link(struct smc_link *lnk) ...@@ -725,6 +894,24 @@ int smc_wr_create_link(struct smc_link *lnk)
rc = -EIO; rc = -EIO;
goto out; goto out;
} }
if (lnk->lgr->smc_version == SMC_V2) {
lnk->wr_rx_v2_dma_addr = ib_dma_map_single(ibdev,
lnk->lgr->wr_rx_buf_v2, SMC_WR_BUF_V2_SIZE,
DMA_FROM_DEVICE);
if (ib_dma_mapping_error(ibdev, lnk->wr_rx_v2_dma_addr)) {
lnk->wr_rx_v2_dma_addr = 0;
rc = -EIO;
goto dma_unmap;
}
lnk->wr_tx_v2_dma_addr = ib_dma_map_single(ibdev,
lnk->lgr->wr_tx_buf_v2, SMC_WR_BUF_V2_SIZE,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(ibdev, lnk->wr_tx_v2_dma_addr)) {
lnk->wr_tx_v2_dma_addr = 0;
rc = -EIO;
goto dma_unmap;
}
}
lnk->wr_tx_dma_addr = ib_dma_map_single( lnk->wr_tx_dma_addr = ib_dma_map_single(
ibdev, lnk->wr_tx_bufs, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt, ibdev, lnk->wr_tx_bufs, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
DMA_TO_DEVICE); DMA_TO_DEVICE);
...@@ -742,6 +929,18 @@ int smc_wr_create_link(struct smc_link *lnk) ...@@ -742,6 +929,18 @@ int smc_wr_create_link(struct smc_link *lnk)
return rc; return rc;
dma_unmap: dma_unmap:
if (lnk->wr_rx_v2_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr,
SMC_WR_BUF_V2_SIZE,
DMA_FROM_DEVICE);
lnk->wr_rx_v2_dma_addr = 0;
}
if (lnk->wr_tx_v2_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_tx_v2_dma_addr,
SMC_WR_BUF_V2_SIZE,
DMA_TO_DEVICE);
lnk->wr_tx_v2_dma_addr = 0;
}
ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
......
...@@ -101,8 +101,10 @@ static inline int smc_wr_rx_post(struct smc_link *link) ...@@ -101,8 +101,10 @@ static inline int smc_wr_rx_post(struct smc_link *link)
int smc_wr_create_link(struct smc_link *lnk); int smc_wr_create_link(struct smc_link *lnk);
int smc_wr_alloc_link_mem(struct smc_link *lnk); int smc_wr_alloc_link_mem(struct smc_link *lnk);
int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr);
void smc_wr_free_link(struct smc_link *lnk); void smc_wr_free_link(struct smc_link *lnk);
void smc_wr_free_link_mem(struct smc_link *lnk); void smc_wr_free_link_mem(struct smc_link *lnk);
void smc_wr_free_lgr_mem(struct smc_link_group *lgr);
void smc_wr_remember_qp_attr(struct smc_link *lnk); void smc_wr_remember_qp_attr(struct smc_link *lnk);
void smc_wr_remove_dev(struct smc_ib_device *smcibdev); void smc_wr_remove_dev(struct smc_ib_device *smcibdev);
void smc_wr_add_dev(struct smc_ib_device *smcibdev); void smc_wr_add_dev(struct smc_ib_device *smcibdev);
...@@ -111,10 +113,16 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler, ...@@ -111,10 +113,16 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler,
struct smc_wr_buf **wr_buf, struct smc_wr_buf **wr_buf,
struct smc_rdma_wr **wrs, struct smc_rdma_wr **wrs,
struct smc_wr_tx_pend_priv **wr_pend_priv); struct smc_wr_tx_pend_priv **wr_pend_priv);
int smc_wr_tx_get_v2_slot(struct smc_link *link,
smc_wr_tx_handler handler,
struct smc_wr_v2_buf **wr_buf,
struct smc_wr_tx_pend_priv **wr_pend_priv);
int smc_wr_tx_put_slot(struct smc_link *link, int smc_wr_tx_put_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv); struct smc_wr_tx_pend_priv *wr_pend_priv);
int smc_wr_tx_send(struct smc_link *link, int smc_wr_tx_send(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv); struct smc_wr_tx_pend_priv *wr_pend_priv);
int smc_wr_tx_v2_send(struct smc_link *link,
struct smc_wr_tx_pend_priv *priv, int len);
int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
unsigned long timeout); unsigned long timeout);
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context); void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment