Commit 4e6e167b authored by David S. Miller's avatar David S. Miller

Merge branch 'smc-rv23'

Karsten Graul says:

====================
net/smc: introduce SMC-Rv2 support

Please apply the following patch series for smc to netdev's net-next tree.

SMC-Rv2 support (see https://www.ibm.com/support/pages/node/6326337)
provides routable RoCE support for SMC-R, eliminating the current
same-subnet restriction, by exploiting the UDP encapsulation feature
of the RoCE adapter hardware.

v2: resend of the v1 patch series, and CC linux-rdma this time
v3: rebase after net tree was merged into net-next
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 24bcbe1c 29397e34
......@@ -84,17 +84,28 @@ enum {
SMC_NLA_SYS_IS_ISM_V2, /* u8 */
SMC_NLA_SYS_LOCAL_HOST, /* string */
SMC_NLA_SYS_SEID, /* string */
SMC_NLA_SYS_IS_SMCR_V2, /* u8 */
__SMC_NLA_SYS_MAX,
SMC_NLA_SYS_MAX = __SMC_NLA_SYS_MAX - 1
};
/* SMC_NLA_LGR_V2 nested attributes */
/* SMC_NLA_LGR_D_V2_COMMON and SMC_NLA_LGR_R_V2_COMMON nested attributes */
enum {
SMC_NLA_LGR_V2_VER, /* u8 */
SMC_NLA_LGR_V2_REL, /* u8 */
SMC_NLA_LGR_V2_OS, /* u8 */
SMC_NLA_LGR_V2_NEG_EID, /* string */
SMC_NLA_LGR_V2_PEER_HOST, /* string */
__SMC_NLA_LGR_V2_MAX,
SMC_NLA_LGR_V2_MAX = __SMC_NLA_LGR_V2_MAX - 1
};
/* SMC_NLA_LGR_R_V2 nested attributes */
enum {
SMC_NLA_LGR_R_V2_UNSPEC,
SMC_NLA_LGR_R_V2_DIRECT, /* u8 */
__SMC_NLA_LGR_R_V2_MAX,
SMC_NLA_LGR_R_V2_MAX = __SMC_NLA_LGR_R_V2_MAX - 1
};
/* SMC_GEN_LGR_SMCR attributes */
......@@ -106,6 +117,8 @@ enum {
SMC_NLA_LGR_R_PNETID, /* string */
SMC_NLA_LGR_R_VLAN_ID, /* u8 */
SMC_NLA_LGR_R_CONNS_NUM, /* u32 */
SMC_NLA_LGR_R_V2_COMMON, /* nest */
SMC_NLA_LGR_R_V2, /* nest */
__SMC_NLA_LGR_R_MAX,
SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1
};
......@@ -138,7 +151,7 @@ enum {
SMC_NLA_LGR_D_PNETID, /* string */
SMC_NLA_LGR_D_CHID, /* u16 */
SMC_NLA_LGR_D_PAD, /* flag */
SMC_NLA_LGR_V2, /* nest */
SMC_NLA_LGR_D_V2_COMMON, /* nest */
__SMC_NLA_LGR_D_MAX,
SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1
};
......
This diff is collapsed.
......@@ -56,7 +56,20 @@ enum smc_state { /* possible states of an SMC socket */
struct smc_link_group;
struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */
u8 type;
union {
u8 type;
#if defined(__BIG_ENDIAN_BITFIELD)
struct {
u8 llc_version:4,
llc_type:4;
};
#elif defined(__LITTLE_ENDIAN_BITFIELD)
struct {
u8 llc_type:4,
llc_version:4;
};
#endif
};
} __aligned(1);
struct smc_cdc_conn_state_flags {
......@@ -286,7 +299,12 @@ static inline bool using_ipsec(struct smc_sock *smc)
}
#endif
struct smc_gidlist;
struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
void smc_close_non_accepted(struct sock *sk);
void smc_fill_gid_list(struct smc_link_group *lgr,
struct smc_gidlist *gidlist,
struct smc_ib_device *known_dev, u8 *known_gid);
#endif /* __SMC_H */
This diff is collapsed.
......@@ -44,6 +44,7 @@
#define SMC_CLC_DECL_NOV2DEXT 0x03030005 /* peer sent no clc SMC-Dv2 ext. */
#define SMC_CLC_DECL_NOSEID 0x03030006 /* peer sent no SEID */
#define SMC_CLC_DECL_NOSMCD2DEV 0x03030007 /* no SMC-Dv2 device found */
#define SMC_CLC_DECL_NOUEID 0x03030008 /* peer sent no UEID */
#define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/
#define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */
#define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */
......@@ -54,6 +55,8 @@
#define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */
#define SMC_CLC_DECL_VERSMISMAT 0x030c0000 /* SMC version mismatch */
#define SMC_CLC_DECL_MAX_DMB 0x030d0000 /* SMC-D DMB limit exceeded */
#define SMC_CLC_DECL_NOROUTE 0x030e0000 /* SMC-Rv2 conn. no route to peer */
#define SMC_CLC_DECL_NOINDIRECT 0x030f0000 /* SMC-Rv2 conn. indirect mismatch*/
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
#define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */
#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
......@@ -213,11 +216,14 @@ struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */
#define SMC_CLC_OS_AIX 3
struct smc_clc_first_contact_ext {
u8 reserved1;
#if defined(__BIG_ENDIAN_BITFIELD)
u8 v2_direct : 1,
reserved : 7;
u8 os_type : 4,
release : 4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 reserved : 7,
v2_direct : 1;
u8 release : 4,
os_type : 4;
#endif
......@@ -225,6 +231,13 @@ struct smc_clc_first_contact_ext {
u8 hostname[SMC_MAX_HOSTNAME_LEN];
};
struct smc_clc_fce_gid_ext {
u8 reserved[16];
u8 gid_cnt;
u8 reserved2[3];
u8 gid[][SMC_GID_SIZE];
};
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
union {
......@@ -239,13 +252,17 @@ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
union {
struct smcr_clc_msg_accept_confirm r0; /* SMC-R */
struct { /* SMC-R */
struct smcr_clc_msg_accept_confirm r0;
u8 eid[SMC_MAX_EID_LEN];
u8 reserved6[8];
} r1;
struct { /* SMC-D */
struct smcd_clc_msg_accept_confirm_common d0;
__be16 chid;
u8 eid[SMC_MAX_EID_LEN];
u8 reserved5[8];
};
} d1;
};
};
......@@ -264,6 +281,24 @@ struct smc_clc_msg_decline { /* clc decline message */
struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */
} __aligned(4);
#define SMC_DECL_DIAG_COUNT_V2 4 /* no. of additional peer diagnosis codes */
struct smc_clc_msg_decline_v2 { /* clc decline message */
struct smc_clc_msg_hdr hdr;
u8 id_for_peer[SMC_SYSTEMID_LEN]; /* sender peer_id */
__be32 peer_diagnosis; /* diagnosis information */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 os_type : 4,
reserved : 4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 reserved : 4,
os_type : 4;
#endif
u8 reserved2[3];
__be32 peer_diagnosis_v2[SMC_DECL_DIAG_COUNT_V2];
struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */
} __aligned(4);
/* determine start of the prefix area within the proposal message */
static inline struct smc_clc_msg_proposal_prefix *
smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
......@@ -282,6 +317,17 @@ static inline bool smcd_indicated(int smc_type)
return smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B;
}
static inline u8 smc_indicated_type(int is_smcd, int is_smcr)
{
if (is_smcd && is_smcr)
return SMC_TYPE_B;
if (is_smcd)
return SMC_TYPE_D;
if (is_smcr)
return SMC_TYPE_R;
return SMC_TYPE_N;
}
/* get SMC-D info from proposal message */
static inline struct smc_clc_msg_smcd *
smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop)
......@@ -334,7 +380,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version);
int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini);
int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
u8 version, u8 *eid);
u8 version, u8 *eid, struct smc_init_info *ini);
int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact,
u8 version, u8 *negotiated_eid);
void smc_clc_init(void) __init;
......@@ -343,6 +389,7 @@ void smc_clc_get_hostname(u8 **host);
bool smc_clc_match_eid(u8 *negotiated_eid,
struct smc_clc_v2_extension *smc_v2_ext,
u8 *peer_eid, u8 *local_eid);
int smc_clc_ueid_count(void);
int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb);
int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info);
int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info);
......
......@@ -244,6 +244,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
goto errattr;
if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable()))
goto errattr;
if (nla_put_u8(skb, SMC_NLA_SYS_IS_SMCR_V2, true))
goto errattr;
smc_clc_get_hostname(&host);
if (host) {
memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN);
......@@ -271,12 +273,65 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
/* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */
static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr,
struct sk_buff *skb,
struct netlink_callback *cb,
struct nlattr *v2_attrs)
{
char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
char smc_eid[SMC_MAX_EID_LEN + 1];
if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version))
goto errv2attr;
if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release))
goto errv2attr;
if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
goto errv2attr;
memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
goto errv2attr;
memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
smc_eid[SMC_MAX_EID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
goto errv2attr;
nla_nest_end(skb, v2_attrs);
return 0;
errv2attr:
nla_nest_cancel(skb, v2_attrs);
return -EMSGSIZE;
}
static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr,
struct sk_buff *skb,
struct netlink_callback *cb)
{
struct nlattr *v2_attrs;
v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2);
if (!v2_attrs)
goto errattr;
if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway))
goto errv2attr;
nla_nest_end(skb, v2_attrs);
return 0;
errv2attr:
nla_nest_cancel(skb, v2_attrs);
errattr:
return -EMSGSIZE;
}
static int smc_nl_fill_lgr(struct smc_link_group *lgr,
struct sk_buff *skb,
struct netlink_callback *cb)
{
char smc_target[SMC_MAX_PNETID_LEN + 1];
struct nlattr *attrs;
struct nlattr *attrs, *v2_attrs;
attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR);
if (!attrs)
......@@ -296,6 +351,15 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr,
smc_target[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
goto errattr;
if (lgr->smc_version > SMC_V1) {
v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON);
if (!v2_attrs)
goto errattr;
if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
goto errattr;
if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb))
goto errattr;
}
nla_nest_end(skb, attrs);
return 0;
......@@ -428,10 +492,7 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
struct sk_buff *skb,
struct netlink_callback *cb)
{
char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
char smc_pnet[SMC_MAX_PNETID_LEN + 1];
char smc_eid[SMC_MAX_EID_LEN + 1];
struct nlattr *v2_attrs;
struct nlattr *attrs;
void *nlh;
......@@ -463,32 +524,19 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
smc_pnet[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
goto errattr;
if (lgr->smc_version > SMC_V1) {
struct nlattr *v2_attrs;
v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_V2);
if (!v2_attrs)
goto errattr;
if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version))
goto errv2attr;
if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release))
goto errv2attr;
if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
goto errv2attr;
memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
goto errv2attr;
memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
smc_eid[SMC_MAX_EID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
goto errv2attr;
nla_nest_end(skb, v2_attrs);
v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_D_V2_COMMON);
if (!v2_attrs)
goto errattr;
if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
goto errattr;
}
nla_nest_end(skb, attrs);
genlmsg_end(skb, nlh);
return 0;
errv2attr:
nla_nest_cancel(skb, v2_attrs);
errattr:
nla_nest_cancel(skb, attrs);
errout:
......@@ -684,24 +732,30 @@ static void smcr_copy_dev_info_to_link(struct smc_link *link)
int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
u8 link_idx, struct smc_init_info *ini)
{
struct smc_ib_device *smcibdev;
u8 rndvec[3];
int rc;
get_device(&ini->ib_dev->ibdev->dev);
atomic_inc(&ini->ib_dev->lnk_cnt);
if (lgr->smc_version == SMC_V2) {
lnk->smcibdev = ini->smcrv2.ib_dev_v2;
lnk->ibport = ini->smcrv2.ib_port_v2;
} else {
lnk->smcibdev = ini->ib_dev;
lnk->ibport = ini->ib_port;
}
get_device(&lnk->smcibdev->ibdev->dev);
atomic_inc(&lnk->smcibdev->lnk_cnt);
lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
lnk->link_id = smcr_next_link_id(lgr);
lnk->lgr = lgr;
lnk->link_idx = link_idx;
lnk->smcibdev = ini->ib_dev;
lnk->ibport = ini->ib_port;
smc_ibdev_cnt_inc(lnk);
smcr_copy_dev_info_to_link(lnk);
lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
atomic_set(&lnk->conn_cnt, 0);
smc_llc_link_set_uid(lnk);
INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
if (!ini->ib_dev->initialized) {
rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
if (!lnk->smcibdev->initialized) {
rc = (int)smc_ib_setup_per_ibdev(lnk->smcibdev);
if (rc)
goto out;
}
......@@ -709,7 +763,9 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
(rndvec[2] << 16);
rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
ini->vlan_id, lnk->gid, &lnk->sgid_index);
ini->vlan_id, lnk->gid, &lnk->sgid_index,
lgr->smc_version == SMC_V2 ?
&ini->smcrv2 : NULL);
if (rc)
goto out;
rc = smc_llc_link_init(lnk);
......@@ -740,11 +796,12 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
smc_llc_link_clear(lnk, false);
out:
smc_ibdev_cnt_dec(lnk);
put_device(&ini->ib_dev->ibdev->dev);
put_device(&lnk->smcibdev->ibdev->dev);
smcibdev = lnk->smcibdev;
memset(lnk, 0, sizeof(struct smc_link));
lnk->state = SMC_LNK_UNUSED;
if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
wake_up(&ini->ib_dev->lnks_deleted);
if (!atomic_dec_return(&smcibdev->lnk_cnt))
wake_up(&smcibdev->lnks_deleted);
return rc;
}
......@@ -808,18 +865,37 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
} else {
/* SMC-R specific settings */
struct smc_ib_device *ibdev;
int ibport;
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
lgr->smc_version = ini->smcr_version;
memcpy(lgr->peer_systemid, ini->peer_systemid,
SMC_SYSTEMID_LEN);
memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
if (lgr->smc_version == SMC_V2) {
ibdev = ini->smcrv2.ib_dev_v2;
ibport = ini->smcrv2.ib_port_v2;
lgr->saddr = ini->smcrv2.saddr;
lgr->uses_gateway = ini->smcrv2.uses_gateway;
memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
ETH_ALEN);
} else {
ibdev = ini->ib_dev;
ibport = ini->ib_port;
}
memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
SMC_MAX_PNETID_LEN);
if (smc_wr_alloc_lgr_mem(lgr))
goto free_wq;
smc_llc_lgr_init(lgr, smc);
link_idx = SMC_SINGLE_LINK;
lnk = &lgr->lnk[link_idx];
rc = smcr_link_init(lgr, lnk, link_idx, ini);
if (rc)
if (rc) {
smc_wr_free_lgr_mem(lgr);
goto free_wq;
}
lgr_list = &smc_lgr_list.list;
lgr_lock = &smc_lgr_list.lock;
atomic_inc(&lgr_cnt);
......@@ -1226,6 +1302,7 @@ static void smc_lgr_free(struct smc_link_group *lgr)
if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
wake_up(&lgr->smcd->lgrs_deleted);
} else {
smc_wr_free_lgr_mem(lgr);
if (!atomic_dec_return(&lgr_cnt))
wake_up(&lgrs_deleted);
}
......@@ -1636,13 +1713,15 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
return rc;
}
static bool smcr_lgr_match(struct smc_link_group *lgr,
struct smc_clc_msg_local *lcl,
static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
u8 peer_systemid[],
u8 peer_gid[],
u8 peer_mac_v1[],
enum smc_lgr_role role, u32 clcqpn)
{
int i;
if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) ||
lgr->role != role)
return false;
......@@ -1650,8 +1729,9 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
if (!smc_link_active(&lgr->lnk[i]))
continue;
if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
!memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
!memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
!memcmp(lgr->lnk[i].peer_gid, peer_gid, SMC_GID_SIZE) &&
(smcr_version == SMC_V2 ||
!memcmp(lgr->lnk[i].peer_mac, peer_mac_v1, ETH_ALEN)))
return true;
}
return false;
......@@ -1690,7 +1770,10 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
if ((ini->is_smcd ?
smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
ini->ism_peer_gid[ini->ism_selected]) :
smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
smcr_lgr_match(lgr, ini->smcr_version,
ini->peer_systemid,
ini->peer_gid, ini->peer_mac, role,
ini->ib_clcqpn)) &&
!lgr->sync_err &&
(ini->smcd_version == SMC_V2 ||
lgr->vlan_id == ini->vlan_id) &&
......
......@@ -42,11 +42,16 @@ enum smc_link_state { /* possible states of a link */
};
#define SMC_WR_BUF_SIZE 48 /* size of work request buffer */
#define SMC_WR_BUF_V2_SIZE 8192 /* size of v2 work request buffer */
struct smc_wr_buf {
u8 raw[SMC_WR_BUF_SIZE];
};
struct smc_wr_v2_buf {
u8 raw[SMC_WR_BUF_V2_SIZE];
};
#define SMC_WR_REG_MR_WAIT_TIME (5 * HZ)/* wait time for ib_wr_reg_mr result */
enum smc_wr_reg_state {
......@@ -92,7 +97,11 @@ struct smc_link {
struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */
struct completion *wr_tx_compl; /* WR send CQE completion */
/* above four vectors have wr_tx_cnt elements and use the same index */
struct ib_send_wr *wr_tx_v2_ib; /* WR send v2 meta data */
struct ib_sge *wr_tx_v2_sge; /* WR send v2 gather meta data*/
struct smc_wr_tx_pend *wr_tx_v2_pend; /* WR send v2 waiting for CQE */
dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */
dma_addr_t wr_tx_v2_dma_addr; /* DMA address of v2 tx buf*/
atomic_long_t wr_tx_id; /* seq # of last sent WR */
unsigned long *wr_tx_mask; /* bit mask of used indexes */
u32 wr_tx_cnt; /* number of WR send buffers */
......@@ -104,6 +113,7 @@ struct smc_link {
struct ib_sge *wr_rx_sges; /* WR recv scatter meta data */
/* above three vectors have wr_rx_cnt elements and use the same index */
dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */
dma_addr_t wr_rx_v2_dma_addr; /* DMA address of v2 rx buf*/
u64 wr_rx_id; /* seq # of last recv WR */
u32 wr_rx_cnt; /* number of WR recv buffers */
unsigned long wr_rx_tstamp; /* jiffies when last buf rx */
......@@ -208,6 +218,7 @@ enum smc_llc_flowtype {
SMC_LLC_FLOW_NONE = 0,
SMC_LLC_FLOW_ADD_LINK = 2,
SMC_LLC_FLOW_DEL_LINK = 4,
SMC_LLC_FLOW_REQ_ADD_LINK = 5,
SMC_LLC_FLOW_RKEY = 6,
};
......@@ -250,6 +261,10 @@ struct smc_link_group {
/* client or server */
struct smc_link lnk[SMC_LINKS_PER_LGR_MAX];
/* smc link */
struct smc_wr_v2_buf *wr_rx_buf_v2;
/* WR v2 recv payload buffer */
struct smc_wr_v2_buf *wr_tx_buf_v2;
/* WR v2 send payload buffer */
char peer_systemid[SMC_SYSTEMID_LEN];
/* unique system_id of peer */
struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX]
......@@ -288,6 +303,9 @@ struct smc_link_group {
/* link keep alive time */
u32 llc_termination_rsn;
/* rsn code for termination */
u8 nexthop_mac[ETH_ALEN];
u8 uses_gateway;
__be32 saddr;
};
struct { /* SMC-D */
u64 peer_gid;
......@@ -302,6 +320,31 @@ struct smc_link_group {
struct smc_clc_msg_local;
#define GID_LIST_SIZE 2
struct smc_gidlist {
u8 len;
u8 list[GID_LIST_SIZE][SMC_GID_SIZE];
};
struct smc_init_info_smcrv2 {
/* Input fields */
__be32 saddr;
struct sock *clc_sk;
__be32 daddr;
/* Output fields when saddr is set */
struct smc_ib_device *ib_dev_v2;
u8 ib_port_v2;
u8 ib_gid_v2[SMC_GID_SIZE];
/* Additional output fields when clc_sk and daddr is set as well */
u8 uses_gateway;
u8 nexthop_mac[ETH_ALEN];
struct smc_gidlist gidlist;
};
struct smc_init_info {
u8 is_smcd;
u8 smc_type_v1;
......@@ -312,11 +355,16 @@ struct smc_init_info {
u32 rc;
u8 negotiated_eid[SMC_MAX_EID_LEN];
/* SMC-R */
struct smc_clc_msg_local *ib_lcl;
u8 smcr_version;
u8 check_smcrv2;
u8 peer_gid[SMC_GID_SIZE];
u8 peer_mac[ETH_ALEN];
u8 peer_systemid[SMC_SYSTEMID_LEN];
struct smc_ib_device *ib_dev;
u8 ib_gid[SMC_GID_SIZE];
u8 ib_port;
u32 ib_clcqpn;
struct smc_init_info_smcrv2 smcrv2;
/* SMC-D */
u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1];
struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1];
......
......@@ -17,6 +17,7 @@
#include <linux/scatterlist.h>
#include <linux/wait.h>
#include <linux/mutex.h>
#include <linux/inetdevice.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
......@@ -62,16 +63,23 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk)
IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN |
IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
struct ib_qp_attr qp_attr;
u8 hop_lim = 1;
memset(&qp_attr, 0, sizeof(qp_attr));
qp_attr.qp_state = IB_QPS_RTR;
qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu);
qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport);
rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, 1, 0);
if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway)
hop_lim = IPV6_DEFAULT_HOPLIMIT;
rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, hop_lim, 0);
rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid);
memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac,
sizeof(lnk->peer_mac));
if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway)
memcpy(&qp_attr.ah_attr.roce.dmac, lnk->lgr->nexthop_mac,
sizeof(lnk->lgr->nexthop_mac));
else
memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac,
sizeof(lnk->peer_mac));
qp_attr.dest_qp_num = lnk->peer_qpn;
qp_attr.rq_psn = lnk->peer_psn; /* starting receive packet seq # */
qp_attr.max_dest_rd_atomic = 1; /* max # of resources for incoming
......@@ -183,9 +191,81 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
}
int smc_ib_find_route(__be32 saddr, __be32 daddr,
u8 nexthop_mac[], u8 *uses_gateway)
{
struct neighbour *neigh = NULL;
struct rtable *rt = NULL;
struct flowi4 fl4 = {
.saddr = saddr,
.daddr = daddr
};
if (daddr == cpu_to_be32(INADDR_NONE))
goto out;
rt = ip_route_output_flow(&init_net, &fl4, NULL);
if (IS_ERR(rt))
goto out;
if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET)
goto out;
neigh = rt->dst.ops->neigh_lookup(&rt->dst, NULL, &fl4.daddr);
if (neigh) {
memcpy(nexthop_mac, neigh->ha, ETH_ALEN);
*uses_gateway = rt->rt_uses_gateway;
return 0;
}
out:
return -ENOENT;
}
static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
const struct ib_gid_attr *attr,
u8 gid[], u8 *sgid_index,
struct smc_init_info_smcrv2 *smcrv2)
{
if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) {
if (gid)
memcpy(gid, &attr->gid, SMC_GID_SIZE);
if (sgid_index)
*sgid_index = attr->index;
return 0;
}
if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
struct in_device *in_dev = __in_dev_get_rcu(ndev);
const struct in_ifaddr *ifa;
bool subnet_match = false;
if (!in_dev)
goto out;
in_dev_for_each_ifa_rcu(ifa, in_dev) {
if (!inet_ifa_match(smcrv2->saddr, ifa))
continue;
subnet_match = true;
break;
}
if (!subnet_match)
goto out;
if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr,
smcrv2->daddr,
smcrv2->nexthop_mac,
&smcrv2->uses_gateway))
goto out;
if (gid)
memcpy(gid, &attr->gid, SMC_GID_SIZE);
if (sgid_index)
*sgid_index = attr->index;
return 0;
}
out:
return -ENODEV;
}
/* determine the gid for an ib-device port and vlan id */
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
unsigned short vlan_id, u8 gid[], u8 *sgid_index)
unsigned short vlan_id, u8 gid[], u8 *sgid_index,
struct smc_init_info_smcrv2 *smcrv2)
{
const struct ib_gid_attr *attr;
const struct net_device *ndev;
......@@ -201,15 +281,13 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
if (!IS_ERR(ndev) &&
((!vlan_id && !is_vlan_dev(ndev)) ||
(vlan_id && is_vlan_dev(ndev) &&
vlan_dev_vlan_id(ndev) == vlan_id)) &&
attr->gid_type == IB_GID_TYPE_ROCE) {
rcu_read_unlock();
if (gid)
memcpy(gid, &attr->gid, SMC_GID_SIZE);
if (sgid_index)
*sgid_index = attr->index;
rdma_put_gid_attr(attr);
return 0;
vlan_dev_vlan_id(ndev) == vlan_id))) {
if (!smc_ib_determine_gid_rcu(ndev, attr, gid,
sgid_index, smcrv2)) {
rcu_read_unlock();
rdma_put_gid_attr(attr);
return 0;
}
}
rcu_read_unlock();
rdma_put_gid_attr(attr);
......@@ -217,6 +295,58 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
return -ENODEV;
}
/* check if gid is still defined on smcibdev */
static bool smc_ib_check_link_gid(u8 gid[SMC_GID_SIZE], bool smcrv2,
struct smc_ib_device *smcibdev, u8 ibport)
{
const struct ib_gid_attr *attr;
bool rc = false;
int i;
for (i = 0; !rc && i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) {
attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i);
if (IS_ERR(attr))
continue;
rcu_read_lock();
if ((!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) ||
(smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
!(ipv6_addr_type((const struct in6_addr *)&attr->gid)
& IPV6_ADDR_LINKLOCAL)))
if (!memcmp(gid, &attr->gid, SMC_GID_SIZE))
rc = true;
rcu_read_unlock();
rdma_put_gid_attr(attr);
}
return rc;
}
/* check all links if the gid is still defined on smcibdev */
static void smc_ib_gid_check(struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_link_group *lgr;
int i;
spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry(lgr, &smc_lgr_list.list, list) {
if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
SMC_MAX_PNETID_LEN))
continue; /* lgr is not affected */
if (list_empty(&lgr->list))
continue;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].state == SMC_LNK_UNUSED ||
lgr->lnk[i].smcibdev != smcibdev)
continue;
if (!smc_ib_check_link_gid(lgr->lnk[i].gid,
lgr->smc_version == SMC_V2,
smcibdev, ibport))
smcr_port_err(smcibdev, ibport);
}
}
spin_unlock_bh(&smc_lgr_list.lock);
}
static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
{
int rc;
......@@ -255,6 +385,7 @@ static void smc_ib_port_event_work(struct work_struct *work)
} else {
clear_bit(port_idx, smcibdev->ports_going_away);
smcr_port_add(smcibdev, port_idx + 1);
smc_ib_gid_check(smcibdev, port_idx + 1);
}
}
}
......@@ -523,6 +654,7 @@ void smc_ib_destroy_queue_pair(struct smc_link *lnk)
/* create a queue pair within the protection domain for a link */
int smc_ib_create_queue_pair(struct smc_link *lnk)
{
int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1;
struct ib_qp_init_attr qp_attr = {
.event_handler = smc_ib_qp_event_handler,
.qp_context = lnk,
......@@ -536,7 +668,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
.max_send_wr = SMC_WR_BUF_CNT * 3,
.max_recv_wr = SMC_WR_BUF_CNT * 3,
.max_send_sge = SMC_IB_MAX_SEND_SGE,
.max_recv_sge = 1,
.max_recv_sge = sges_per_buf,
},
.sq_sig_type = IB_SIGNAL_REQ_WR,
.qp_type = IB_QPT_RC,
......
......@@ -59,6 +59,17 @@ struct smc_ib_device { /* ib-device infos for smc */
int ndev_ifidx[SMC_MAX_PORTS]; /* ndev if indexes */
};
static inline __be32 smc_ib_gid_to_ipv4(u8 gid[SMC_GID_SIZE])
{
struct in6_addr *addr6 = (struct in6_addr *)gid;
if (ipv6_addr_v4mapped(addr6) ||
!(addr6->s6_addr32[0] | addr6->s6_addr32[1] | addr6->s6_addr32[2]))
return addr6->s6_addr32[3];
return cpu_to_be32(INADDR_NONE);
}
struct smc_init_info_smcrv2;
struct smc_buf_desc;
struct smc_link;
......@@ -90,7 +101,10 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
unsigned short vlan_id, u8 gid[], u8 *sgid_index);
unsigned short vlan_id, u8 gid[], u8 *sgid_index,
struct smc_init_info_smcrv2 *smcrv2);
int smc_ib_find_route(__be32 saddr, __be32 daddr,
u8 nexthop_mac[], u8 *uses_gateway);
bool smc_ib_is_valid_local_systemid(void);
int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
#endif
This diff is collapsed.
......@@ -30,10 +30,19 @@ enum smc_llc_msg_type {
SMC_LLC_ADD_LINK = 0x02,
SMC_LLC_ADD_LINK_CONT = 0x03,
SMC_LLC_DELETE_LINK = 0x04,
SMC_LLC_REQ_ADD_LINK = 0x05,
SMC_LLC_CONFIRM_RKEY = 0x06,
SMC_LLC_TEST_LINK = 0x07,
SMC_LLC_CONFIRM_RKEY_CONT = 0x08,
SMC_LLC_DELETE_RKEY = 0x09,
/* V2 types */
SMC_LLC_CONFIRM_LINK_V2 = 0x21,
SMC_LLC_ADD_LINK_V2 = 0x22,
SMC_LLC_DELETE_LINK_V2 = 0x24,
SMC_LLC_REQ_ADD_LINK_V2 = 0x25,
SMC_LLC_CONFIRM_RKEY_V2 = 0x26,
SMC_LLC_TEST_LINK_V2 = 0x27,
SMC_LLC_DELETE_RKEY_V2 = 0x29,
};
#define smc_link_downing(state) \
......@@ -102,7 +111,8 @@ void smc_llc_flow_qentry_del(struct smc_llc_flow *flow);
void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord,
u32 rsn);
int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry);
int smc_llc_srv_add_link(struct smc_link *link);
int smc_llc_srv_add_link(struct smc_link *link,
struct smc_llc_qentry *req_qentry);
void smc_llc_add_link_local(struct smc_link *link);
int smc_llc_init(void) __init;
......
......@@ -953,6 +953,26 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
return rc;
}
static int smc_pnet_determine_gid(struct smc_ib_device *ibdev, int i,
struct smc_init_info *ini)
{
if (!ini->check_smcrv2 &&
!smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->ib_gid, NULL,
NULL)) {
ini->ib_dev = ibdev;
ini->ib_port = i;
return 0;
}
if (ini->check_smcrv2 &&
!smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->smcrv2.ib_gid_v2,
NULL, &ini->smcrv2)) {
ini->smcrv2.ib_dev_v2 = ibdev;
ini->smcrv2.ib_port_v2 = i;
return 0;
}
return -ENODEV;
}
/* find a roce device for the given pnetid */
static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
struct smc_init_info *ini,
......@@ -961,7 +981,6 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
struct smc_ib_device *ibdev;
int i;
ini->ib_dev = NULL;
mutex_lock(&smc_ib_devices.mutex);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
if (ibdev == known_dev)
......@@ -971,12 +990,9 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
continue;
if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) &&
smc_ib_port_active(ibdev, i) &&
!test_bit(i - 1, ibdev->ports_going_away) &&
!smc_ib_determine_gid(ibdev, i, ini->vlan_id,
ini->ib_gid, NULL)) {
ini->ib_dev = ibdev;
ini->ib_port = i;
goto out;
!test_bit(i - 1, ibdev->ports_going_away)) {
if (!smc_pnet_determine_gid(ibdev, i, ini))
goto out;
}
}
}
......@@ -1016,12 +1032,9 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
dev_put(ndev);
if (netdev == ndev &&
smc_ib_port_active(ibdev, i) &&
!test_bit(i - 1, ibdev->ports_going_away) &&
!smc_ib_determine_gid(ibdev, i, ini->vlan_id,
ini->ib_gid, NULL)) {
ini->ib_dev = ibdev;
ini->ib_port = i;
break;
!test_bit(i - 1, ibdev->ports_going_away)) {
if (!smc_pnet_determine_gid(ibdev, i, ini))
break;
}
}
}
......@@ -1083,8 +1096,6 @@ void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini)
{
struct dst_entry *dst = sk_dst_get(sk);
ini->ib_dev = NULL;
ini->ib_port = 0;
if (!dst)
goto out;
if (!dst->dev)
......
This diff is collapsed.
......@@ -101,8 +101,10 @@ static inline int smc_wr_rx_post(struct smc_link *link)
int smc_wr_create_link(struct smc_link *lnk);
int smc_wr_alloc_link_mem(struct smc_link *lnk);
int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr);
void smc_wr_free_link(struct smc_link *lnk);
void smc_wr_free_link_mem(struct smc_link *lnk);
void smc_wr_free_lgr_mem(struct smc_link_group *lgr);
void smc_wr_remember_qp_attr(struct smc_link *lnk);
void smc_wr_remove_dev(struct smc_ib_device *smcibdev);
void smc_wr_add_dev(struct smc_ib_device *smcibdev);
......@@ -111,10 +113,16 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler,
struct smc_wr_buf **wr_buf,
struct smc_rdma_wr **wrs,
struct smc_wr_tx_pend_priv **wr_pend_priv);
int smc_wr_tx_get_v2_slot(struct smc_link *link,
smc_wr_tx_handler handler,
struct smc_wr_v2_buf **wr_buf,
struct smc_wr_tx_pend_priv **wr_pend_priv);
int smc_wr_tx_put_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
int smc_wr_tx_send(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
int smc_wr_tx_v2_send(struct smc_link *link,
struct smc_wr_tx_pend_priv *priv, int len);
int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
unsigned long timeout);
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment