Commit 7263d72b authored by David S. Miller's avatar David S. Miller

Merge branch 'net-smc-preparations-for-SMC-R-link-failover'

Karsten Graul says:

====================
net/smc: preparations for SMC-R link failover

This patch series prepares the SMC code for the implementation of SMC-R link
failover capabilities which are still missing to reach full compliance with
RFC 7609.
The code changes are separated into 65 patches which together form the new
functionality. I tried to create meaningful patches which allow to follow the
implementation.

Question: how to handle the remaining 52 patches? All of them are needed for
link failover to work and should make it into the same merge window.
Can I send them all together?

The SMC-R implementation will transparently make use of the link failover
feature when matching RoCE devices are available, no special setup is required.
All RoCE devices with the same PNET ID as the TCP device (hardware-defined or
user-defined via the smc_pnet tool) are candidates to get used to form a link
in a link group. When at least 2 RoCE devices are available on both
communication endpoints then a symmetric link group is formed, meaning the link
group has 2 independent links. If one RoCE device goes down then all connections
on this link are moved to the surviving link. Upon recovery of the failing
device or availability of a new one, the symmetric link group will be restored.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 21615efa 00a049cf
......@@ -338,36 +338,53 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
}
/* register a new rmb, send confirm_rkey msg to register with peer */
static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
bool conf_rkey)
static int smcr_link_reg_rmb(struct smc_link *link,
struct smc_buf_desc *rmb_desc, bool conf_rkey)
{
if (!rmb_desc->wr_reg) {
if (!rmb_desc->is_reg_mr[link->link_idx]) {
/* register memory region for new rmb */
if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
rmb_desc->regerr = 1;
if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
rmb_desc->is_reg_err = true;
return -EFAULT;
}
rmb_desc->wr_reg = 1;
rmb_desc->is_reg_mr[link->link_idx] = true;
}
if (!conf_rkey)
return 0;
/* exchange confirm_rkey msg with peer */
if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
rmb_desc->regerr = 1;
return -EFAULT;
if (!rmb_desc->is_conf_rkey) {
if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
rmb_desc->is_reg_err = true;
return -EFAULT;
}
rmb_desc->is_conf_rkey = true;
}
return 0;
}
static int smc_clnt_conf_first_link(struct smc_sock *smc)
/* register the new rmb on all links */
static int smcr_lgr_reg_rmbs(struct smc_link_group *lgr,
struct smc_buf_desc *rmb_desc)
{
struct net *net = sock_net(smc->clcsock->sk);
struct smc_link_group *lgr = smc->conn.lgr;
struct smc_link *link;
int i, rc;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
continue;
rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc, true);
if (rc)
return rc;
}
return 0;
}
static int smcr_clnt_conf_first_link(struct smc_sock *smc)
{
struct smc_link *link = smc->conn.lnk;
int rest;
int rc;
link = &lgr->lnk[SMC_SINGLE_LINK];
/* receive CONFIRM LINK request from server over RoCE fabric */
rest = wait_for_completion_interruptible_timeout(
&link->llc_confirm,
......@@ -389,7 +406,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
smc_wr_remember_qp_attr(link);
if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
return SMC_CLC_DECL_ERR_REGRMB;
/* send CONFIRM LINK response over RoCE fabric */
......@@ -415,7 +432,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
if (rc < 0)
return SMC_CLC_DECL_TIMEOUT_AL;
smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
smc_llc_link_active(link);
return 0;
}
......@@ -610,7 +627,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
mutex_unlock(&smc_client_lgr_pending);
return reason_code;
}
link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
link = smc->conn.lnk;
smc_conn_save_peer_info(smc, aclc);
......@@ -622,7 +639,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
if (ini->cln_first_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, aclc);
if (smc_rmb_rtoken_handling(&smc->conn, aclc))
if (smc_rmb_rtoken_handling(&smc->conn, link, aclc))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
ini->cln_first_contact);
......@@ -634,7 +651,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
ini->cln_first_contact);
} else {
if (smc_reg_rmb(link, smc->conn.rmb_desc, true))
if (smcr_lgr_reg_rmbs(smc->conn.lgr, smc->conn.rmb_desc))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
ini->cln_first_contact);
}
......@@ -649,7 +666,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
/* QP confirmation over RoCE fabric */
reason_code = smc_clnt_conf_first_link(smc);
reason_code = smcr_clnt_conf_first_link(smc);
if (reason_code)
return smc_connect_abort(smc, reason_code,
ini->cln_first_contact);
......@@ -999,17 +1016,13 @@ void smc_close_non_accepted(struct sock *sk)
sock_put(sk); /* final sock_put */
}
static int smc_serv_conf_first_link(struct smc_sock *smc)
static int smcr_serv_conf_first_link(struct smc_sock *smc)
{
struct net *net = sock_net(smc->clcsock->sk);
struct smc_link_group *lgr = smc->conn.lgr;
struct smc_link *link;
struct smc_link *link = smc->conn.lnk;
int rest;
int rc;
link = &lgr->lnk[SMC_SINGLE_LINK];
if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
return SMC_CLC_DECL_ERR_REGRMB;
/* send CONFIRM LINK request to client over the RoCE fabric */
......@@ -1050,7 +1063,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
}
smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
smc_llc_link_active(link);
return 0;
}
......@@ -1194,10 +1207,10 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
/* listen worker: register buffers */
static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
{
struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
struct smc_connection *conn = &new_smc->conn;
if (local_contact != SMC_FIRST_CONTACT) {
if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
if (smcr_lgr_reg_rmbs(conn->lgr, conn->rmb_desc))
return SMC_CLC_DECL_ERR_REGRMB;
}
smc_rmb_sync_sg_for_device(&new_smc->conn);
......@@ -1210,13 +1223,13 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
struct smc_clc_msg_accept_confirm *cclc,
int local_contact)
{
struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
struct smc_link *link = new_smc->conn.lnk;
int reason_code = 0;
if (local_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, cclc);
if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) {
reason_code = SMC_CLC_DECL_ERR_RTOK;
goto decline;
}
......@@ -1227,7 +1240,7 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
goto decline;
}
/* QP confirmation over RoCE fabric */
reason_code = smc_serv_conf_first_link(new_smc);
reason_code = smcr_serv_conf_first_link(new_smc);
if (reason_code)
goto decline;
}
......
......@@ -121,6 +121,7 @@ enum smc_urg_state {
struct smc_connection {
struct rb_node alert_node;
struct smc_link_group *lgr; /* link group of connection */
struct smc_link *lnk; /* assigned SMC-R link */
u32 alert_token_local; /* unique conn. id */
u8 peer_rmbe_idx; /* from tcp handshake */
int peer_rmbe_size; /* size of peer rx buffer */
......
......@@ -57,7 +57,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_rdma_wr **wr_rdma_buf,
struct smc_cdc_tx_pend **pend)
{
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
struct smc_link *link = conn->lnk;
int rc;
rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
......@@ -91,12 +91,10 @@ int smc_cdc_msg_send(struct smc_connection *conn,
struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend)
{
struct smc_link *link = conn->lnk;
union smc_host_cursor cfed;
struct smc_link *link;
int rc;
link = &conn->lgr->lnk[SMC_SINGLE_LINK];
smc_cdc_add_pending_send(conn, pend);
conn->tx_cdc_seq++;
......@@ -165,7 +163,7 @@ static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend)
void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
{
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
struct smc_link *link = conn->lnk;
smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE,
smc_cdc_tx_filter, smc_cdc_tx_dismisser,
......
......@@ -496,7 +496,7 @@ int smc_clc_send_confirm(struct smc_sock *smc)
sizeof(SMCD_EYECATCHER));
} else {
/* SMC-R specific settings */
link = &conn->lgr->lnk[SMC_SINGLE_LINK];
link = conn->lnk;
memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
cclc.hdr.path = SMC_TYPE_R;
......@@ -508,13 +508,13 @@ int smc_clc_send_confirm(struct smc_sock *smc)
ETH_ALEN);
hton24(cclc.qpn, link->roce_qp->qp_num);
cclc.rmb_rkey =
htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
cclc.rmbe_alert_token = htonl(conn->alert_token_local);
cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
cclc.rmbe_size = conn->rmbe_size_short;
cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
(conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(cclc.psn, link->psn_initial);
memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
......@@ -572,7 +572,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
aclc.hdr.path = SMC_TYPE_R;
link = &conn->lgr->lnk[SMC_SINGLE_LINK];
link = conn->lnk;
memcpy(aclc.lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
memcpy(&aclc.lcl.gid, link->gid, SMC_GID_SIZE);
......@@ -580,13 +580,13 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
ETH_ALEN);
hton24(aclc.qpn, link->roce_qp->qp_num);
aclc.rmb_rkey =
htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */
aclc.rmbe_alert_token = htonl(conn->alert_token_local);
aclc.qp_mtu = link->path_mtu;
aclc.rmbe_size = conn->rmbe_size_short,
aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
(conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(aclc.psn, link->psn_initial);
memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
......
......@@ -44,6 +44,7 @@
#define SMC_CLC_DECL_DIFFPREFIX 0x03070000 /* IP prefix / subnet mismatch */
#define SMC_CLC_DECL_GETVLANERR 0x03080000 /* err to get vlan id of ip device*/
#define SMC_CLC_DECL_ISMVLANERR 0x03090000 /* err to reg vlan id on ism dev */
#define SMC_CLC_DECL_NOACTLINK 0x030a0000 /* no active smc-r link in lgr */
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
#define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */
#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
......
......@@ -116,7 +116,7 @@ static void smc_lgr_add_alert_token(struct smc_connection *conn)
* Requires @conns_lock
* Note that '0' is a reserved value and not assigned.
*/
static void smc_lgr_register_conn(struct smc_connection *conn)
static int smc_lgr_register_conn(struct smc_connection *conn)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
static atomic_t nexttoken = ATOMIC_INIT(0);
......@@ -131,7 +131,24 @@ static void smc_lgr_register_conn(struct smc_connection *conn)
conn->alert_token_local = 0;
}
smc_lgr_add_alert_token(conn);
/* assign the new connection to a link */
if (!conn->lgr->is_smcd) {
struct smc_link *lnk;
int i;
/* tbd - link balancing */
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
lnk = &conn->lgr->lnk[i];
if (lnk->state == SMC_LNK_ACTIVATING ||
lnk->state == SMC_LNK_ACTIVE)
conn->lnk = lnk;
}
if (!conn->lnk)
return SMC_CLC_DECL_NOACTLINK;
}
conn->lgr->conns_num++;
return 0;
}
/* Unregister connection and reset the alert token of the given connection<
......@@ -179,7 +196,7 @@ void smc_lgr_cleanup_early(struct smc_connection *conn)
* of the DELETE LINK sequence from server; or as server to
* initiate the delete processing. See smc_llc_rx_delete_link().
*/
static int smc_link_send_delete(struct smc_link *lnk, bool orderly)
static int smcr_link_send_delete(struct smc_link *lnk, bool orderly)
{
if (lnk->state == SMC_LNK_ACTIVE &&
!smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) {
......@@ -197,8 +214,8 @@ static void smc_lgr_free_work(struct work_struct *work)
struct smc_link_group,
free_work);
spinlock_t *lgr_lock;
struct smc_link *lnk;
bool conns;
int i;
smc_lgr_list_head(lgr, &lgr_lock);
spin_lock_bh(lgr_lock);
......@@ -215,25 +232,38 @@ static void smc_lgr_free_work(struct work_struct *work)
}
list_del_init(&lgr->list); /* remove from smc_lgr_list */
lnk = &lgr->lnk[SMC_SINGLE_LINK];
if (!lgr->is_smcd && !lgr->terminating) {
/* try to send del link msg, on error free lgr immediately */
if (lnk->state == SMC_LNK_ACTIVE &&
!smc_link_send_delete(lnk, true)) {
/* reschedule in case we never receive a response */
smc_lgr_schedule_free_work(lgr);
bool do_wait = false;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &lgr->lnk[i];
/* try to send del link msg, on err free immediately */
if (lnk->state == SMC_LNK_ACTIVE &&
!smcr_link_send_delete(lnk, true)) {
/* reschedule in case we never receive a resp */
smc_lgr_schedule_free_work(lgr);
do_wait = true;
}
}
if (do_wait) {
spin_unlock_bh(lgr_lock);
return;
return; /* wait for resp, see smc_llc_rx_delete_link */
}
}
lgr->freeing = 1; /* this instance does the freeing, no new schedule */
spin_unlock_bh(lgr_lock);
cancel_delayed_work(&lgr->free_work);
if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
smc_llc_link_inactive(lnk);
if (lgr->is_smcd && !lgr->terminating)
smc_ism_signal_shutdown(lgr);
if (!lgr->is_smcd) {
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &lgr->lnk[i];
if (smc_link_usable(lnk))
lnk->state = SMC_LNK_INACTIVE;
}
}
smc_lgr_free(lgr);
}
......@@ -245,6 +275,87 @@ static void smc_lgr_terminate_work(struct work_struct *work)
__smc_lgr_terminate(lgr, true);
}
/* return next unique link id for the lgr */
static u8 smcr_next_link_id(struct smc_link_group *lgr)
{
u8 link_id;
int i;
while (1) {
link_id = ++lgr->next_link_id;
if (!link_id) /* skip zero as link_id */
link_id = ++lgr->next_link_id;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (smc_link_usable(&lgr->lnk[i]) &&
lgr->lnk[i].link_id == link_id)
continue;
}
break;
}
return link_id;
}
static int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
u8 link_idx, struct smc_init_info *ini)
{
u8 rndvec[3];
int rc;
get_device(&ini->ib_dev->ibdev->dev);
atomic_inc(&ini->ib_dev->lnk_cnt);
lnk->state = SMC_LNK_ACTIVATING;
lnk->link_id = smcr_next_link_id(lgr);
lnk->lgr = lgr;
lnk->link_idx = link_idx;
lnk->smcibdev = ini->ib_dev;
lnk->ibport = ini->ib_port;
lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
if (!ini->ib_dev->initialized) {
rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
if (rc)
goto out;
}
get_random_bytes(rndvec, sizeof(rndvec));
lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
(rndvec[2] << 16);
rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
ini->vlan_id, lnk->gid, &lnk->sgid_index);
if (rc)
goto out;
rc = smc_llc_link_init(lnk);
if (rc)
goto out;
rc = smc_wr_alloc_link_mem(lnk);
if (rc)
goto clear_llc_lnk;
rc = smc_ib_create_protection_domain(lnk);
if (rc)
goto free_link_mem;
rc = smc_ib_create_queue_pair(lnk);
if (rc)
goto dealloc_pd;
rc = smc_wr_create_link(lnk);
if (rc)
goto destroy_qp;
return 0;
destroy_qp:
smc_ib_destroy_queue_pair(lnk);
dealloc_pd:
smc_ib_dealloc_protection_domain(lnk);
free_link_mem:
smc_wr_free_link_mem(lnk);
clear_llc_lnk:
smc_llc_link_clear(lnk);
out:
put_device(&ini->ib_dev->ibdev->dev);
memset(lnk, 0, sizeof(struct smc_link));
lnk->state = SMC_LNK_UNUSED;
if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
wake_up(&ini->ib_dev->lnks_deleted);
return rc;
}
/* create a new SMC link group */
static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
{
......@@ -252,7 +363,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
struct list_head *lgr_list;
struct smc_link *lnk;
spinlock_t *lgr_lock;
u8 rndvec[3];
u8 link_idx;
int rc = 0;
int i;
......@@ -274,13 +385,14 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->freefast = 0;
lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
rwlock_init(&lgr->sndbufs_lock);
rwlock_init(&lgr->rmbs_lock);
mutex_init(&lgr->sndbufs_lock);
mutex_init(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
INIT_LIST_HEAD(&lgr->sndbufs[i]);
INIT_LIST_HEAD(&lgr->rmbs[i]);
}
lgr->next_link_id = 0;
smc_lgr_list.num += SMC_LGR_NUM_INCR;
memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
......@@ -297,48 +409,19 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
atomic_inc(&ini->ism_dev->lgr_cnt);
} else {
/* SMC-R specific settings */
get_device(&ini->ib_dev->ibdev->dev);
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
SMC_SYSTEMID_LEN);
smc_llc_lgr_init(lgr, smc);
lnk = &lgr->lnk[SMC_SINGLE_LINK];
/* initialize link */
lnk->state = SMC_LNK_ACTIVATING;
lnk->link_id = SMC_SINGLE_LINK;
lnk->smcibdev = ini->ib_dev;
lnk->ibport = ini->ib_port;
lgr_list = &smc_lgr_list.list;
lgr_lock = &smc_lgr_list.lock;
lnk->path_mtu =
ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
if (!ini->ib_dev->initialized)
smc_ib_setup_per_ibdev(ini->ib_dev);
get_random_bytes(rndvec, sizeof(rndvec));
lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
(rndvec[2] << 16);
rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
ini->vlan_id, lnk->gid,
&lnk->sgid_index);
if (rc)
goto free_lgr;
rc = smc_llc_link_init(lnk);
link_idx = SMC_SINGLE_LINK;
lnk = &lgr->lnk[link_idx];
rc = smcr_link_init(lgr, lnk, link_idx, ini);
if (rc)
goto free_lgr;
rc = smc_wr_alloc_link_mem(lnk);
if (rc)
goto clear_llc_lnk;
rc = smc_ib_create_protection_domain(lnk);
if (rc)
goto free_link_mem;
rc = smc_ib_create_queue_pair(lnk);
if (rc)
goto dealloc_pd;
rc = smc_wr_create_link(lnk);
if (rc)
goto destroy_qp;
lgr_list = &smc_lgr_list.list;
lgr_lock = &smc_lgr_list.lock;
atomic_inc(&lgr_cnt);
atomic_inc(&ini->ib_dev->lnk_cnt);
}
smc->conn.lgr = lgr;
spin_lock_bh(lgr_lock);
......@@ -346,14 +429,6 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
spin_unlock_bh(lgr_lock);
return 0;
destroy_qp:
smc_ib_destroy_queue_pair(lnk);
dealloc_pd:
smc_ib_dealloc_protection_domain(lnk);
free_link_mem:
smc_wr_free_link_mem(lnk);
clear_llc_lnk:
smc_llc_link_clear(lnk);
free_lgr:
kfree(lgr);
ism_put_vlan:
......@@ -369,29 +444,37 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
return rc;
}
static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
struct smc_link *lnk)
{
struct smc_link_group *lgr = lnk->lgr;
if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
/* unregister rmb with peer */
smc_llc_do_delete_rkey(lnk, rmb_desc);
rmb_desc->is_conf_rkey = false;
}
if (rmb_desc->is_reg_err) {
/* buf registration failed, reuse not possible */
mutex_lock(&lgr->rmbs_lock);
list_del(&rmb_desc->list);
mutex_unlock(&lgr->rmbs_lock);
smc_buf_free(lgr, true, rmb_desc);
} else {
rmb_desc->used = 0;
}
}
static void smc_buf_unuse(struct smc_connection *conn,
struct smc_link_group *lgr)
{
if (conn->sndbuf_desc)
conn->sndbuf_desc->used = 0;
if (conn->rmb_desc) {
if (!conn->rmb_desc->regerr) {
if (!lgr->is_smcd && !list_empty(&lgr->list)) {
/* unregister rmb with peer */
smc_llc_do_delete_rkey(
&lgr->lnk[SMC_SINGLE_LINK],
conn->rmb_desc);
}
conn->rmb_desc->used = 0;
} else {
/* buf registration failed, reuse not possible */
write_lock_bh(&lgr->rmbs_lock);
list_del(&conn->rmb_desc->list);
write_unlock_bh(&lgr->rmbs_lock);
smc_buf_free(lgr, true, conn->rmb_desc);
}
}
if (conn->rmb_desc && lgr->is_smcd)
conn->rmb_desc->used = 0;
else if (conn->rmb_desc)
smcr_buf_unuse(conn->rmb_desc, conn->lnk);
}
/* remove a finished connection from its link group */
......@@ -417,8 +500,12 @@ void smc_conn_free(struct smc_connection *conn)
smc_lgr_schedule_free_work(lgr);
}
static void smc_link_clear(struct smc_link *lnk)
static void smcr_link_clear(struct smc_link *lnk)
{
struct smc_ib_device *smcibdev;
if (lnk->peer_qpn == 0)
return;
lnk->peer_qpn = 0;
smc_llc_link_clear(lnk);
smc_ib_modify_qp_reset(lnk);
......@@ -426,26 +513,35 @@ static void smc_link_clear(struct smc_link *lnk)
smc_ib_destroy_queue_pair(lnk);
smc_ib_dealloc_protection_domain(lnk);
smc_wr_free_link_mem(lnk);
if (!atomic_dec_return(&lnk->smcibdev->lnk_cnt))
wake_up(&lnk->smcibdev->lnks_deleted);
put_device(&lnk->smcibdev->ibdev->dev);
smcibdev = lnk->smcibdev;
memset(lnk, 0, sizeof(struct smc_link));
lnk->state = SMC_LNK_UNUSED;
if (!atomic_dec_return(&smcibdev->lnk_cnt))
wake_up(&smcibdev->lnks_deleted);
}
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc)
{
struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
struct smc_link *lnk;
int i;
if (is_rmb) {
if (buf_desc->mr_rx[SMC_SINGLE_LINK])
smc_ib_put_memory_region(
buf_desc->mr_rx[SMC_SINGLE_LINK]);
smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
DMA_FROM_DEVICE);
} else {
smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
DMA_TO_DEVICE);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
lnk = &lgr->lnk[i];
if (!buf_desc->is_map_ib[lnk->link_idx])
continue;
if (is_rmb) {
if (buf_desc->mr_rx[lnk->link_idx])
smc_ib_put_memory_region(
buf_desc->mr_rx[lnk->link_idx]);
smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
} else {
smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
}
sg_free_table(&buf_desc->sgt[lnk->link_idx]);
}
sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
if (buf_desc->pages)
__free_pages(buf_desc->pages, buf_desc->order);
kfree(buf_desc);
......@@ -503,6 +599,8 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
/* remove a link group */
static void smc_lgr_free(struct smc_link_group *lgr)
{
int i;
smc_lgr_free_bufs(lgr);
if (lgr->is_smcd) {
if (!lgr->terminating) {
......@@ -512,8 +610,11 @@ static void smc_lgr_free(struct smc_link_group *lgr)
if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
wake_up(&lgr->smcd->lgrs_deleted);
} else {
smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].state != SMC_LNK_UNUSED)
smcr_link_clear(&lgr->lnk[i]);
}
smc_llc_lgr_clear(lgr);
if (!atomic_dec_return(&lgr_cnt))
wake_up(&lgrs_deleted);
}
......@@ -581,16 +682,20 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft)
static void smc_lgr_cleanup(struct smc_link_group *lgr)
{
int i;
if (lgr->is_smcd) {
smc_ism_signal_shutdown(lgr);
smcd_unregister_all_dmbs(lgr);
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
put_device(&lgr->smcd->dev);
} else {
struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &lgr->lnk[i];
if (lnk->state != SMC_LNK_INACTIVE)
smc_llc_link_inactive(lnk);
if (smc_link_usable(lnk))
lnk->state = SMC_LNK_INACTIVE;
}
}
}
......@@ -609,8 +714,6 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
if (!soft)
cancel_delayed_work_sync(&lgr->free_work);
lgr->terminating = 1;
if (!lgr->is_smcd)
smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
/* kill remaining link group connections */
read_lock_bh(&lgr->conns_lock);
......@@ -656,14 +759,22 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_link_group *lgr, *l;
LIST_HEAD(lgr_free_list);
int i;
spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
if (!lgr->is_smcd &&
lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) {
list_move(&lgr->list, &lgr_free_list);
lgr->freeing = 1;
if (lgr->is_smcd)
continue;
/* tbd - terminate only when no more links are active */
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_usable(&lgr->lnk[i]) ||
lgr->lnk[i].state == SMC_LNK_DELETING)
continue;
if (lgr->lnk[i].smcibdev == smcibdev &&
lgr->lnk[i].ibport == ibport) {
list_move(&lgr->list, &lgr_free_list);
lgr->freeing = 1;
}
}
}
spin_unlock_bh(&smc_lgr_list.lock);
......@@ -728,6 +839,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
{
struct smc_link_group *lgr, *lg;
LIST_HEAD(lgr_free_list);
int i;
spin_lock_bh(&smc_lgr_list.lock);
if (!smcibdev) {
......@@ -736,9 +848,12 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
lgr->freeing = 1;
} else {
list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev) {
list_move(&lgr->list, &lgr_free_list);
lgr->freeing = 1;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].smcibdev == smcibdev) {
list_move(&lgr->list, &lgr_free_list);
lgr->freeing = 1;
break;
}
}
}
}
......@@ -810,15 +925,21 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
struct smc_clc_msg_local *lcl,
enum smc_lgr_role role, u32 clcqpn)
{
return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
SMC_SYSTEMID_LEN) &&
!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
SMC_GID_SIZE) &&
!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
sizeof(lcl->mac)) &&
lgr->role == role &&
(lgr->role == SMC_SERV ||
lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
int i;
if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
lgr->role != role)
return false;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
continue;
if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
!memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
!memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
return true;
}
return false;
}
static bool smcd_lgr_match(struct smc_link_group *lgr,
......@@ -859,15 +980,17 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
/* link group found */
ini->cln_first_contact = SMC_REUSE_CONTACT;
conn->lgr = lgr;
smc_lgr_register_conn(conn); /* add smc conn to lgr */
if (delayed_work_pending(&lgr->free_work))
cancel_delayed_work(&lgr->free_work);
rc = smc_lgr_register_conn(conn); /* add conn to lgr */
write_unlock_bh(&lgr->conns_lock);
if (!rc && delayed_work_pending(&lgr->free_work))
cancel_delayed_work(&lgr->free_work);
break;
}
write_unlock_bh(&lgr->conns_lock);
}
spin_unlock_bh(lgr_lock);
if (rc)
return rc;
if (role == SMC_CLNT && !ini->srv_first_contact &&
ini->cln_first_contact == SMC_FIRST_CONTACT) {
......@@ -885,8 +1008,10 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
goto out;
lgr = conn->lgr;
write_lock_bh(&lgr->conns_lock);
smc_lgr_register_conn(conn); /* add smc conn to lgr */
rc = smc_lgr_register_conn(conn); /* add smc conn to lgr */
write_unlock_bh(&lgr->conns_lock);
if (rc)
goto out;
}
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
......@@ -934,19 +1059,19 @@ int smc_uncompress_bufsize(u8 compressed)
* buffer size; if not available, return NULL
*/
static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
rwlock_t *lock,
struct mutex *lock,
struct list_head *buf_list)
{
struct smc_buf_desc *buf_slot;
read_lock_bh(lock);
mutex_lock(lock);
list_for_each_entry(buf_slot, buf_list, list) {
if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
read_unlock_bh(lock);
mutex_unlock(lock);
return buf_slot;
}
}
read_unlock_bh(lock);
mutex_unlock(lock);
return NULL;
}
......@@ -959,12 +1084,55 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
}
/* map an rmb buf to a link */
static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
struct smc_link *lnk)
{
int rc;
if (buf_desc->is_map_ib[lnk->link_idx])
return 0;
rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
if (rc)
return rc;
sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
buf_desc->cpu_addr, buf_desc->len);
/* map sg table to DMA address */
rc = smc_ib_buf_map_sg(lnk, buf_desc,
is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
/* SMC protocol depends on mapping to one DMA address only */
if (rc != 1) {
rc = -EAGAIN;
goto free_table;
}
/* create a new memory region for the RMB */
if (is_rmb) {
rc = smc_ib_get_memory_region(lnk->roce_pd,
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_LOCAL_WRITE,
buf_desc, lnk->link_idx);
if (rc)
goto buf_unmap;
smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
}
buf_desc->is_map_ib[lnk->link_idx] = true;
return 0;
buf_unmap:
smc_ib_buf_unmap_sg(lnk, buf_desc,
is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
free_table:
sg_free_table(&buf_desc->sgt[lnk->link_idx]);
return rc;
}
static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
bool is_rmb, int bufsize)
{
struct smc_buf_desc *buf_desc;
struct smc_link *lnk;
int rc;
/* try to alloc a new buffer */
buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
......@@ -981,41 +1149,31 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
return ERR_PTR(-EAGAIN);
}
buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
buf_desc->len = bufsize;
return buf_desc;
}
/* build the sg table from the pages */
lnk = &lgr->lnk[SMC_SINGLE_LINK];
rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
GFP_KERNEL);
if (rc) {
smc_buf_free(lgr, is_rmb, buf_desc);
return ERR_PTR(rc);
}
sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
buf_desc->cpu_addr, bufsize);
/* map buf_desc on all usable links,
* unused buffers stay mapped as long as the link is up
*/
static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
struct smc_buf_desc *buf_desc, bool is_rmb)
{
int i, rc = 0;
/* map sg table to DMA address */
rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
/* SMC protocol depends on mapping to one DMA address only */
if (rc != 1) {
smc_buf_free(lgr, is_rmb, buf_desc);
return ERR_PTR(-EAGAIN);
}
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &lgr->lnk[i];
/* create a new memory region for the RMB */
if (is_rmb) {
rc = smc_ib_get_memory_region(lnk->roce_pd,
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_LOCAL_WRITE,
buf_desc);
if (rc) {
smc_buf_free(lgr, is_rmb, buf_desc);
return ERR_PTR(rc);
if (!smc_link_usable(lnk))
continue;
if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
smcr_buf_unuse(buf_desc, lnk);
rc = -ENOMEM;
goto out;
}
}
buf_desc->len = bufsize;
return buf_desc;
out:
return rc;
}
#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
......@@ -1062,8 +1220,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
struct smc_link_group *lgr = conn->lgr;
struct list_head *buf_list;
int bufsize, bufsize_short;
struct mutex *lock; /* lock buffer list */
int sk_buf_size;
rwlock_t *lock;
if (is_rmb)
/* use socket recv buffer size (w/o overhead) as start value */
......@@ -1104,15 +1262,21 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
continue;
buf_desc->used = 1;
write_lock_bh(lock);
mutex_lock(lock);
list_add(&buf_desc->list, buf_list);
write_unlock_bh(lock);
mutex_unlock(lock);
break; /* found */
}
if (IS_ERR(buf_desc))
return -ENOMEM;
if (!is_smcd) {
if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
return -ENOMEM;
}
}
if (is_rmb) {
conn->rmb_desc = buf_desc;
conn->rmbe_size_short = bufsize_short;
......@@ -1132,42 +1296,44 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
if (!conn->lgr || conn->lgr->is_smcd)
if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
return;
smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
conn->sndbuf_desc, DMA_TO_DEVICE);
smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
}
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
if (!conn->lgr || conn->lgr->is_smcd)
if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
return;
smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
conn->sndbuf_desc, DMA_TO_DEVICE);
smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
}
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
int i;
if (!conn->lgr || conn->lgr->is_smcd)
return;
smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
conn->rmb_desc, DMA_FROM_DEVICE);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_usable(&conn->lgr->lnk[i]))
continue;
smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
DMA_FROM_DEVICE);
}
}
void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
int i;
if (!conn->lgr || conn->lgr->is_smcd)
return;
smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
conn->rmb_desc, DMA_FROM_DEVICE);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_usable(&conn->lgr->lnk[i]))
continue;
smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
DMA_FROM_DEVICE);
}
}
/* create the send and receive buffer for an SMC socket;
......@@ -1203,15 +1369,16 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
}
/* add a new rtoken from peer */
int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
{
struct smc_link_group *lgr = smc_get_lgr(lnk);
u64 dma_addr = be64_to_cpu(nw_vaddr);
u32 rkey = ntohl(nw_rkey);
int i;
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
(lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
test_bit(i, lgr->rtokens_used_mask)) {
/* already in list */
return i;
......@@ -1220,23 +1387,25 @@ int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
i = smc_rmb_reserve_rtoken_idx(lgr);
if (i < 0)
return i;
lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
lgr->rtokens[i][lnk->link_idx].rkey = rkey;
lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
return i;
}
/* delete an rtoken */
int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
/* delete an rtoken from all links */
int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
{
struct smc_link_group *lgr = smc_get_lgr(lnk);
u32 rkey = ntohl(nw_rkey);
int i;
int i, j;
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
test_bit(i, lgr->rtokens_used_mask)) {
lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
lgr->rtokens[i][j].rkey = 0;
lgr->rtokens[i][j].dma_addr = 0;
}
clear_bit(i, lgr->rtokens_used_mask);
return 0;
}
......@@ -1246,9 +1415,10 @@ int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
/* save rkey and dma_addr received from peer during clc handshake */
int smc_rmb_rtoken_handling(struct smc_connection *conn,
struct smc_link *lnk,
struct smc_clc_msg_accept_confirm *clc)
{
conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
clc->rmb_rkey);
if (conn->rtoken_idx < 0)
return conn->rtoken_idx;
......
......@@ -32,6 +32,7 @@ enum smc_lgr_role { /* possible roles of a link group */
};
enum smc_link_state { /* possible states of a link */
SMC_LNK_UNUSED, /* link is unused */
SMC_LNK_INACTIVE, /* link is inactive */
SMC_LNK_ACTIVATING, /* link is being activated */
SMC_LNK_ACTIVE, /* link is active */
......@@ -115,9 +116,10 @@ struct smc_link {
u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */
u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/
u8 link_id; /* unique # within link group */
u8 link_idx; /* index in lgr link array */
struct smc_link_group *lgr; /* parent link group */
enum smc_link_state state; /* state of link */
struct workqueue_struct *llc_wq; /* single thread work queue */
struct completion llc_confirm; /* wait for rx of conf link */
struct completion llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
int llc_confirm_rc; /* rc from confirm link msg */
......@@ -127,10 +129,10 @@ struct smc_link {
struct delayed_work llc_testlink_wrk; /* testlink worker */
struct completion llc_testlink_resp; /* wait for rx of testlink */
int llc_testlink_time; /* testlink interval */
struct completion llc_confirm_rkey; /* wait 4 rx of cnf rkey */
int llc_confirm_rkey_rc; /* rc from cnf rkey msg */
struct completion llc_delete_rkey; /* wait 4 rx of del rkey */
int llc_delete_rkey_rc; /* rc from del rkey msg */
struct completion llc_confirm_rkey_resp; /* w4 rx of cnf rkey */
int llc_confirm_rkey_resp_rc; /* rc from cnf rkey */
struct completion llc_delete_rkey_resp; /* w4 rx of del rkey */
int llc_delete_rkey_resp_rc; /* rc from del rkey */
struct mutex llc_delete_rkey_mutex; /* serialize usage */
};
......@@ -150,25 +152,32 @@ struct smc_buf_desc {
struct page *pages;
int len; /* length of buffer */
u32 used; /* currently used / unused */
u8 wr_reg : 1; /* mem region registered */
u8 regerr : 1; /* err during registration */
union {
struct { /* SMC-R */
struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];
/* virtual buffer */
struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
/* for rmb only: memory region
* incl. rkey provided to peer
*/
u32 order; /* allocation order */
struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];
/* virtual buffer */
struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
/* for rmb only: memory region
* incl. rkey provided to peer
*/
u32 order; /* allocation order */
u8 is_conf_rkey;
/* confirm_rkey done */
u8 is_reg_mr[SMC_LINKS_PER_LGR_MAX];
/* mem region registered */
u8 is_map_ib[SMC_LINKS_PER_LGR_MAX];
/* mem region mapped to lnk */
u8 is_reg_err;
/* buffer registration err */
};
struct { /* SMC-D */
unsigned short sba_idx;
/* SBA index number */
u64 token;
/* DMB token number */
dma_addr_t dma_addr;
/* DMA address */
unsigned short sba_idx;
/* SBA index number */
u64 token;
/* DMB token number */
dma_addr_t dma_addr;
/* DMA address */
};
};
};
......@@ -196,9 +205,9 @@ struct smc_link_group {
unsigned short vlan_id; /* vlan id of link group */
struct list_head sndbufs[SMC_RMBE_SIZES];/* tx buffers */
rwlock_t sndbufs_lock; /* protects tx buffers */
struct mutex sndbufs_lock; /* protects tx buffers */
struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
rwlock_t rmbs_lock; /* protects rx buffers */
struct mutex rmbs_lock; /* protects rx buffers */
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
......@@ -222,6 +231,15 @@ struct smc_link_group {
/* remote addr/key pairs */
DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX);
/* used rtoken elements */
u8 next_link_id;
struct list_head llc_event_q;
/* queue for llc events */
spinlock_t llc_event_q_lock;
/* protects llc_event_q */
struct work_struct llc_event_work;
/* llc event worker */
int llc_testlink_time;
/* link keep alive time */
};
struct { /* SMC-D */
u64 peer_gid;
......@@ -285,6 +303,14 @@ static inline struct smc_connection *smc_lgr_find_conn(
return res;
}
/* returns true if the specified link is usable */
static inline bool smc_link_usable(struct smc_link *lnk)
{
if (lnk->state == SMC_LNK_UNUSED || lnk->state == SMC_LNK_INACTIVE)
return false;
return true;
}
struct smc_sock;
struct smc_clc_msg_accept_confirm;
struct smc_clc_msg_local;
......@@ -299,10 +325,10 @@ void smc_smcd_terminate_all(struct smcd_dev *dev);
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
int smc_buf_create(struct smc_sock *smc, bool is_smcd);
int smc_uncompress_bufsize(u8 compressed);
int smc_rmb_rtoken_handling(struct smc_connection *conn,
int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
struct smc_clc_msg_accept_confirm *clc);
int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey);
int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey);
int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey);
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
......@@ -317,6 +343,6 @@ void smc_core_exit(void);
static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
{
return container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
return link->lgr;
}
#endif
......@@ -389,15 +389,15 @@ void smc_ib_put_memory_region(struct ib_mr *mr)
ib_dereg_mr(mr);
}
static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot, u8 link_idx)
{
unsigned int offset = 0;
int sg_num;
/* map the largest prefix of a dma mapped SG list */
sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK],
buf_slot->sgt[SMC_SINGLE_LINK].sgl,
buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
sg_num = ib_map_mr_sg(buf_slot->mr_rx[link_idx],
buf_slot->sgt[link_idx].sgl,
buf_slot->sgt[link_idx].orig_nents,
&offset, PAGE_SIZE);
return sg_num;
......@@ -405,29 +405,29 @@ static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
/* Allocate a memory region and map the dma mapped SG list of buf_slot */
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
struct smc_buf_desc *buf_slot)
struct smc_buf_desc *buf_slot, u8 link_idx)
{
if (buf_slot->mr_rx[SMC_SINGLE_LINK])
if (buf_slot->mr_rx[link_idx])
return 0; /* already done */
buf_slot->mr_rx[SMC_SINGLE_LINK] =
buf_slot->mr_rx[link_idx] =
ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order);
if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) {
if (IS_ERR(buf_slot->mr_rx[link_idx])) {
int rc;
rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]);
buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL;
rc = PTR_ERR(buf_slot->mr_rx[link_idx]);
buf_slot->mr_rx[link_idx] = NULL;
return rc;
}
if (smc_ib_map_mr_sg(buf_slot) != 1)
if (smc_ib_map_mr_sg(buf_slot, link_idx) != 1)
return -EINVAL;
return 0;
}
/* synchronize buffer usage for cpu access */
void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
......@@ -435,11 +435,11 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
unsigned int i;
/* for now there is just one DMA address */
for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
buf_slot->sgt[lnk->link_idx].nents, i) {
if (!sg_dma_len(sg))
break;
ib_dma_sync_single_for_cpu(smcibdev->ibdev,
ib_dma_sync_single_for_cpu(lnk->smcibdev->ibdev,
sg_dma_address(sg),
sg_dma_len(sg),
data_direction);
......@@ -447,7 +447,7 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
}
/* synchronize buffer usage for device access */
void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
void smc_ib_sync_sg_for_device(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
......@@ -455,11 +455,11 @@ void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
unsigned int i;
/* for now there is just one DMA address */
for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
buf_slot->sgt[lnk->link_idx].nents, i) {
if (!sg_dma_len(sg))
break;
ib_dma_sync_single_for_device(smcibdev->ibdev,
ib_dma_sync_single_for_device(lnk->smcibdev->ibdev,
sg_dma_address(sg),
sg_dma_len(sg),
data_direction);
......@@ -467,15 +467,15 @@ void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
}
/* Map a new TX or RX buffer SG-table to DMA */
int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
int smc_ib_buf_map_sg(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
int mapped_nents;
mapped_nents = ib_dma_map_sg(smcibdev->ibdev,
buf_slot->sgt[SMC_SINGLE_LINK].sgl,
buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
mapped_nents = ib_dma_map_sg(lnk->smcibdev->ibdev,
buf_slot->sgt[lnk->link_idx].sgl,
buf_slot->sgt[lnk->link_idx].orig_nents,
data_direction);
if (!mapped_nents)
return -ENOMEM;
......@@ -483,18 +483,18 @@ int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
return mapped_nents;
}
void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
void smc_ib_buf_unmap_sg(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
if (!buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address)
if (!buf_slot->sgt[lnk->link_idx].sgl->dma_address)
return; /* already unmapped */
ib_dma_unmap_sg(smcibdev->ibdev,
buf_slot->sgt[SMC_SINGLE_LINK].sgl,
buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
ib_dma_unmap_sg(lnk->smcibdev->ibdev,
buf_slot->sgt[lnk->link_idx].sgl,
buf_slot->sgt[lnk->link_idx].orig_nents,
data_direction);
buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;
buf_slot->sgt[lnk->link_idx].sgl->dma_address = 0;
}
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
......@@ -579,8 +579,9 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
i++) {
set_bit(i, &smcibdev->port_event_mask);
/* determine pnetids of the port */
smc_pnetid_by_dev_port(ibdev->dev.parent, i,
smcibdev->pnetid[i]);
if (smc_pnetid_by_dev_port(ibdev->dev.parent, i,
smcibdev->pnetid[i]))
smc_pnetid_by_table_ib(smcibdev, i + 1);
}
schedule_work(&smcibdev->port_event_work);
}
......
......@@ -59,10 +59,10 @@ struct smc_link;
int smc_ib_register_client(void) __init;
void smc_ib_unregister_client(void);
bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
int smc_ib_buf_map_sg(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
void smc_ib_buf_unmap_sg(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
void smc_ib_dealloc_protection_domain(struct smc_link *lnk);
......@@ -74,12 +74,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk);
int smc_ib_modify_qp_reset(struct smc_link *lnk);
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
struct smc_buf_desc *buf_slot);
struct smc_buf_desc *buf_slot, u8 link_idx);
void smc_ib_put_memory_region(struct ib_mr *mr);
void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
void smc_ib_sync_sg_for_device(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
......
......@@ -296,7 +296,8 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
device_initialize(&smcd->dev);
dev_set_name(&smcd->dev, name);
smcd->ops = ops;
smc_pnetid_by_dev_port(parent, 0, smcd->pnetid);
if (smc_pnetid_by_dev_port(parent, 0, smcd->pnetid))
smc_pnetid_by_table_smcd(smcd);
spin_lock_init(&smcd->lock);
spin_lock_init(&smcd->lgr_lock);
......
......@@ -134,6 +134,12 @@ union smc_llc_msg {
#define SMC_LLC_FLAG_RESP 0x80
struct smc_llc_qentry {
struct list_head list;
struct smc_link *link;
union smc_llc_msg msg;
};
/********************************** send *************************************/
struct smc_llc_tx_pend {
......@@ -231,9 +237,9 @@ static int smc_llc_send_confirm_rkey(struct smc_link *link,
rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY;
rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey);
rkeyllc->rtoken[0].rmb_key =
htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
rkeyllc->rtoken[0].rmb_vaddr = cpu_to_be64(
(u64)sg_dma_address(rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
(u64)sg_dma_address(rmb_desc->sgt[link->link_idx].sgl));
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
......@@ -256,7 +262,7 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY;
rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey);
rkeyllc->num_rkeys = 1;
rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
......@@ -356,46 +362,20 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
return rc;
}
struct smc_llc_send_work {
struct work_struct work;
struct smc_link *link;
int llclen;
union smc_llc_msg llcbuf;
};
/* worker that sends a prepared message */
static void smc_llc_send_message_work(struct work_struct *work)
/* schedule an llc send on link, may wait for buffers */
static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
{
struct smc_llc_send_work *llcwrk = container_of(work,
struct smc_llc_send_work, work);
struct smc_wr_tx_pend_priv *pend;
struct smc_wr_buf *wr_buf;
int rc;
if (llcwrk->link->state == SMC_LNK_INACTIVE)
goto out;
rc = smc_llc_add_pending_send(llcwrk->link, &wr_buf, &pend);
if (!smc_link_usable(link))
return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
goto out;
memcpy(wr_buf, &llcwrk->llcbuf, llcwrk->llclen);
smc_wr_tx_send(llcwrk->link, pend);
out:
kfree(llcwrk);
}
/* copy llcbuf and schedule an llc send on link */
static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
{
struct smc_llc_send_work *wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
if (!wrk)
return -ENOMEM;
INIT_WORK(&wrk->work, smc_llc_send_message_work);
wrk->link = link;
wrk->llclen = llclen;
memcpy(&wrk->llcbuf, llcbuf, llclen);
queue_work(link->llc_wq, &wrk->work);
return 0;
return rc;
memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
return smc_wr_tx_send(link, pend);
}
/********************************* receive ***********************************/
......@@ -404,27 +384,17 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
struct smc_llc_msg_confirm_link *llc)
{
struct smc_link_group *lgr = smc_get_lgr(link);
int conf_rc;
int conf_rc = 0;
/* RMBE eyecatchers are not supported */
if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
conf_rc = 0;
else
if (!(llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
conf_rc = ENOTSUPP;
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
if (lgr->role == SMC_SERV &&
link->state == SMC_LNK_ACTIVATING) {
link->llc_confirm_resp_rc = conf_rc;
complete(&link->llc_confirm_resp);
}
} else {
if (lgr->role == SMC_CLNT &&
link->state == SMC_LNK_ACTIVATING) {
link->llc_confirm_rc = conf_rc;
link->link_id = llc->link_num;
complete(&link->llc_confirm);
}
if (lgr->role == SMC_CLNT &&
link->state == SMC_LNK_ACTIVATING) {
link->llc_confirm_rc = conf_rc;
link->link_id = llc->link_num;
complete(&link->llc_confirm);
}
}
......@@ -433,27 +403,22 @@ static void smc_llc_rx_add_link(struct smc_link *link,
{
struct smc_link_group *lgr = smc_get_lgr(link);
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
if (link->state == SMC_LNK_ACTIVATING)
complete(&link->llc_add_resp);
} else {
if (link->state == SMC_LNK_ACTIVATING) {
complete(&link->llc_add);
return;
}
if (link->state == SMC_LNK_ACTIVATING) {
complete(&link->llc_add);
return;
}
if (lgr->role == SMC_SERV) {
smc_llc_prep_add_link(llc, link,
link->smcibdev->mac[link->ibport - 1],
link->gid, SMC_LLC_REQ);
if (lgr->role == SMC_SERV) {
smc_llc_prep_add_link(llc, link,
link->smcibdev->mac[link->ibport - 1],
link->gid, SMC_LLC_REQ);
} else {
smc_llc_prep_add_link(llc, link,
link->smcibdev->mac[link->ibport - 1],
link->gid, SMC_LLC_RESP);
}
smc_llc_send_message(link, llc, sizeof(*llc));
} else {
smc_llc_prep_add_link(llc, link,
link->smcibdev->mac[link->ibport - 1],
link->gid, SMC_LLC_RESP);
}
smc_llc_send_message(link, llc);
}
static void smc_llc_rx_delete_link(struct smc_link *link,
......@@ -461,34 +426,24 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
{
struct smc_link_group *lgr = smc_get_lgr(link);
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
if (lgr->role == SMC_SERV)
smc_lgr_schedule_free_work_fast(lgr);
smc_lgr_forget(lgr);
smc_llc_link_deleting(link);
if (lgr->role == SMC_SERV) {
/* client asks to delete this link, send request */
smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true);
} else {
smc_lgr_forget(lgr);
smc_llc_link_deleting(link);
if (lgr->role == SMC_SERV) {
/* client asks to delete this link, send request */
smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true);
} else {
/* server requests to delete this link, send response */
smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
}
smc_llc_send_message(link, llc, sizeof(*llc));
smc_lgr_terminate_sched(lgr);
/* server requests to delete this link, send response */
smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
}
smc_llc_send_message(link, llc);
smc_lgr_terminate_sched(lgr);
}
static void smc_llc_rx_test_link(struct smc_link *link,
struct smc_llc_msg_test_link *llc)
{
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
if (link->state == SMC_LNK_ACTIVE)
complete(&link->llc_testlink_resp);
} else {
llc->hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, llc, sizeof(*llc));
}
llc->hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, llc);
}
static void smc_llc_rx_confirm_rkey(struct smc_link *link,
......@@ -496,34 +451,24 @@ static void smc_llc_rx_confirm_rkey(struct smc_link *link,
{
int rc;
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
link->llc_confirm_rkey_rc = llc->hd.flags &
SMC_LLC_FLAG_RKEY_NEG;
complete(&link->llc_confirm_rkey);
} else {
rc = smc_rtoken_add(smc_get_lgr(link),
llc->rtoken[0].rmb_vaddr,
llc->rtoken[0].rmb_key);
rc = smc_rtoken_add(link,
llc->rtoken[0].rmb_vaddr,
llc->rtoken[0].rmb_key);
/* ignore rtokens for other links, we have only one link */
/* ignore rtokens for other links, we have only one link */
llc->hd.flags |= SMC_LLC_FLAG_RESP;
if (rc < 0)
llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
smc_llc_send_message(link, llc, sizeof(*llc));
}
llc->hd.flags |= SMC_LLC_FLAG_RESP;
if (rc < 0)
llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
smc_llc_send_message(link, llc);
}
static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
struct smc_llc_msg_confirm_rkey_cont *llc)
{
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
/* unused as long as we don't send this type of msg */
} else {
/* ignore rtokens for other links, we have only one link */
llc->hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, llc, sizeof(*llc));
}
/* ignore rtokens for other links, we have only one link */
llc->hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, llc);
}
static void smc_llc_rx_delete_rkey(struct smc_link *link,
......@@ -532,38 +477,41 @@ static void smc_llc_rx_delete_rkey(struct smc_link *link,
u8 err_mask = 0;
int i, max;
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
link->llc_delete_rkey_rc = llc->hd.flags &
SMC_LLC_FLAG_RKEY_NEG;
complete(&link->llc_delete_rkey);
} else {
max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
for (i = 0; i < max; i++) {
if (smc_rtoken_delete(smc_get_lgr(link), llc->rkey[i]))
err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
}
max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
for (i = 0; i < max; i++) {
if (smc_rtoken_delete(link, llc->rkey[i]))
err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
}
if (err_mask) {
llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
llc->err_mask = err_mask;
}
if (err_mask) {
llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
llc->err_mask = err_mask;
}
llc->hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, llc);
}
/* flush the llc event queue */
static void smc_llc_event_flush(struct smc_link_group *lgr)
{
struct smc_llc_qentry *qentry, *q;
llc->hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, llc, sizeof(*llc));
spin_lock_bh(&lgr->llc_event_q_lock);
list_for_each_entry_safe(qentry, q, &lgr->llc_event_q, list) {
list_del_init(&qentry->list);
kfree(qentry);
}
spin_unlock_bh(&lgr->llc_event_q_lock);
}
static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
{
struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
union smc_llc_msg *llc = buf;
union smc_llc_msg *llc = &qentry->msg;
struct smc_link *link = qentry->link;
if (wc->byte_len < sizeof(*llc))
return; /* short message */
if (llc->raw.hdr.length != sizeof(*llc))
return; /* invalid message */
if (link->state == SMC_LNK_INACTIVE)
return; /* link not active, drop msg */
if (!smc_link_usable(link))
goto out;
switch (llc->raw.hdr.common.type) {
case SMC_LLC_TEST_LINK:
......@@ -588,6 +536,103 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
break;
}
out:
kfree(qentry);
}
/* worker to process llc messages on the event queue */
static void smc_llc_event_work(struct work_struct *work)
{
struct smc_link_group *lgr = container_of(work, struct smc_link_group,
llc_event_work);
struct smc_llc_qentry *qentry;
again:
spin_lock_bh(&lgr->llc_event_q_lock);
if (!list_empty(&lgr->llc_event_q)) {
qentry = list_first_entry(&lgr->llc_event_q,
struct smc_llc_qentry, list);
list_del_init(&qentry->list);
spin_unlock_bh(&lgr->llc_event_q_lock);
smc_llc_event_handler(qentry);
goto again;
}
spin_unlock_bh(&lgr->llc_event_q_lock);
}
/* process llc responses in tasklet context */
static void smc_llc_rx_response(struct smc_link *link, union smc_llc_msg *llc)
{
int rc = 0;
switch (llc->raw.hdr.common.type) {
case SMC_LLC_TEST_LINK:
if (link->state == SMC_LNK_ACTIVE)
complete(&link->llc_testlink_resp);
break;
case SMC_LLC_CONFIRM_LINK:
if (!(llc->raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
rc = ENOTSUPP;
if (link->lgr->role == SMC_SERV &&
link->state == SMC_LNK_ACTIVATING) {
link->llc_confirm_resp_rc = rc;
complete(&link->llc_confirm_resp);
}
break;
case SMC_LLC_ADD_LINK:
if (link->state == SMC_LNK_ACTIVATING)
complete(&link->llc_add_resp);
break;
case SMC_LLC_DELETE_LINK:
if (link->lgr->role == SMC_SERV)
smc_lgr_schedule_free_work_fast(link->lgr);
break;
case SMC_LLC_CONFIRM_RKEY:
link->llc_confirm_rkey_resp_rc = llc->raw.hdr.flags &
SMC_LLC_FLAG_RKEY_NEG;
complete(&link->llc_confirm_rkey_resp);
break;
case SMC_LLC_CONFIRM_RKEY_CONT:
/* unused as long as we don't send this type of msg */
break;
case SMC_LLC_DELETE_RKEY:
link->llc_delete_rkey_resp_rc = llc->raw.hdr.flags &
SMC_LLC_FLAG_RKEY_NEG;
complete(&link->llc_delete_rkey_resp);
break;
}
}
/* copy received msg and add it to the event queue */
static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
{
struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
struct smc_link_group *lgr = link->lgr;
struct smc_llc_qentry *qentry;
union smc_llc_msg *llc = buf;
unsigned long flags;
if (wc->byte_len < sizeof(*llc))
return; /* short message */
if (llc->raw.hdr.length != sizeof(*llc))
return; /* invalid message */
/* process responses immediately */
if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) {
smc_llc_rx_response(link, llc);
return;
}
qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC);
if (!qentry)
return;
qentry->link = link;
INIT_LIST_HEAD(&qentry->list);
memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg));
spin_lock_irqsave(&lgr->llc_event_q_lock, flags);
list_add_tail(&qentry->list, &lgr->llc_event_q);
spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags);
schedule_work(&link->lgr->llc_event_work);
}
/***************************** worker, utils *********************************/
......@@ -613,43 +658,55 @@ static void smc_llc_testlink_work(struct work_struct *work)
/* receive TEST LINK response over RoCE fabric */
rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
SMC_LLC_WAIT_TIME);
if (link->state != SMC_LNK_ACTIVE)
return; /* link state changed */
if (rc <= 0) {
smc_lgr_terminate_sched(smc_get_lgr(link));
return;
}
next_interval = link->llc_testlink_time;
out:
queue_delayed_work(link->llc_wq, &link->llc_testlink_wrk,
next_interval);
schedule_delayed_work(&link->llc_testlink_wrk, next_interval);
}
void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
{
struct net *net = sock_net(smc->clcsock->sk);
INIT_WORK(&lgr->llc_event_work, smc_llc_event_work);
INIT_LIST_HEAD(&lgr->llc_event_q);
spin_lock_init(&lgr->llc_event_q_lock);
lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
}
/* called after lgr was removed from lgr_list */
void smc_llc_lgr_clear(struct smc_link_group *lgr)
{
smc_llc_event_flush(lgr);
cancel_work_sync(&lgr->llc_event_work);
}
int smc_llc_link_init(struct smc_link *link)
{
struct smc_link_group *lgr = smc_get_lgr(link);
link->llc_wq = alloc_ordered_workqueue("llc_wq-%x:%x)", WQ_MEM_RECLAIM,
*((u32 *)lgr->id),
link->link_id);
if (!link->llc_wq)
return -ENOMEM;
init_completion(&link->llc_confirm);
init_completion(&link->llc_confirm_resp);
init_completion(&link->llc_add);
init_completion(&link->llc_add_resp);
init_completion(&link->llc_confirm_rkey);
init_completion(&link->llc_delete_rkey);
init_completion(&link->llc_confirm_rkey_resp);
init_completion(&link->llc_delete_rkey_resp);
mutex_init(&link->llc_delete_rkey_mutex);
init_completion(&link->llc_testlink_resp);
INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
return 0;
}
void smc_llc_link_active(struct smc_link *link, int testlink_time)
void smc_llc_link_active(struct smc_link *link)
{
link->state = SMC_LNK_ACTIVE;
if (testlink_time) {
link->llc_testlink_time = testlink_time * HZ;
queue_delayed_work(link->llc_wq, &link->llc_testlink_wrk,
link->llc_testlink_time);
if (link->lgr->llc_testlink_time) {
link->llc_testlink_time = link->lgr->llc_testlink_time * HZ;
schedule_delayed_work(&link->llc_testlink_wrk,
link->llc_testlink_time);
}
}
......@@ -659,20 +716,13 @@ void smc_llc_link_deleting(struct smc_link *link)
smc_wr_wakeup_tx_wait(link);
}
/* called in tasklet context */
void smc_llc_link_inactive(struct smc_link *link)
{
link->state = SMC_LNK_INACTIVE;
cancel_delayed_work(&link->llc_testlink_wrk);
smc_wr_wakeup_reg_wait(link);
smc_wr_wakeup_tx_wait(link);
}
/* called in worker context */
void smc_llc_link_clear(struct smc_link *link)
{
flush_workqueue(link->llc_wq);
destroy_workqueue(link->llc_wq);
complete(&link->llc_testlink_resp);
cancel_delayed_work_sync(&link->llc_testlink_wrk);
smc_wr_wakeup_reg_wait(link);
smc_wr_wakeup_tx_wait(link);
}
/* register a new rtoken at the remote peer */
......@@ -682,14 +732,14 @@ int smc_llc_do_confirm_rkey(struct smc_link *link,
int rc;
/* protected by mutex smc_create_lgr_pending */
reinit_completion(&link->llc_confirm_rkey);
reinit_completion(&link->llc_confirm_rkey_resp);
rc = smc_llc_send_confirm_rkey(link, rmb_desc);
if (rc)
return rc;
/* receive CONFIRM RKEY response from server over RoCE fabric */
rc = wait_for_completion_interruptible_timeout(&link->llc_confirm_rkey,
SMC_LLC_WAIT_TIME);
if (rc <= 0 || link->llc_confirm_rkey_rc)
rc = wait_for_completion_interruptible_timeout(
&link->llc_confirm_rkey_resp, SMC_LLC_WAIT_TIME);
if (rc <= 0 || link->llc_confirm_rkey_resp_rc)
return -EFAULT;
return 0;
}
......@@ -703,14 +753,14 @@ int smc_llc_do_delete_rkey(struct smc_link *link,
mutex_lock(&link->llc_delete_rkey_mutex);
if (link->state != SMC_LNK_ACTIVE)
goto out;
reinit_completion(&link->llc_delete_rkey);
reinit_completion(&link->llc_delete_rkey_resp);
rc = smc_llc_send_delete_rkey(link, rmb_desc);
if (rc)
goto out;
/* receive DELETE RKEY response from server over RoCE fabric */
rc = wait_for_completion_interruptible_timeout(&link->llc_delete_rkey,
SMC_LLC_WAIT_TIME);
if (rc <= 0 || link->llc_delete_rkey_rc)
rc = wait_for_completion_interruptible_timeout(
&link->llc_delete_rkey_resp, SMC_LLC_WAIT_TIME);
if (rc <= 0 || link->llc_delete_rkey_resp_rc)
rc = -EFAULT;
else
rc = 0;
......
......@@ -35,6 +35,17 @@ enum smc_llc_msg_type {
SMC_LLC_DELETE_RKEY = 0x09,
};
/* returns a usable link of the link group, or NULL */
static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr)
{
int i;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
if (smc_link_usable(&lgr->lnk[i]))
return &lgr->lnk[i];
return NULL;
}
/* transmit */
int smc_llc_send_confirm_link(struct smc_link *lnk,
enum smc_llc_reqresp reqresp);
......@@ -42,10 +53,11 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
enum smc_llc_reqresp reqresp);
int smc_llc_send_delete_link(struct smc_link *link,
enum smc_llc_reqresp reqresp, bool orderly);
void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc);
void smc_llc_lgr_clear(struct smc_link_group *lgr);
int smc_llc_link_init(struct smc_link *link);
void smc_llc_link_active(struct smc_link *link, int testlink_time);
void smc_llc_link_active(struct smc_link *link);
void smc_llc_link_deleting(struct smc_link *link);
void smc_llc_link_inactive(struct smc_link *link);
void smc_llc_link_clear(struct smc_link *link);
int smc_llc_do_confirm_rkey(struct smc_link *link,
struct smc_buf_desc *rmb_desc);
......
......@@ -50,29 +50,26 @@ static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
static struct genl_family smc_pnet_nl_family;
/**
* struct smc_user_pnetentry - pnet identifier name entry for/from user
* @list: List node.
* @pnet_name: Pnet identifier name
* @ndev: pointer to network device.
* @smcibdev: Pointer to IB device.
* @ib_port: Port of IB device.
* @smcd_dev: Pointer to smcd device.
*/
struct smc_user_pnetentry {
struct list_head list;
char pnet_name[SMC_MAX_PNETID_LEN + 1];
struct net_device *ndev;
struct smc_ib_device *smcibdev;
u8 ib_port;
struct smcd_dev *smcd_dev;
enum smc_pnet_nametype {
SMC_PNET_ETH = 1,
SMC_PNET_IB = 2,
};
/* pnet entry stored in pnet table */
struct smc_pnetentry {
struct list_head list;
char pnet_name[SMC_MAX_PNETID_LEN + 1];
struct net_device *ndev;
enum smc_pnet_nametype type;
union {
struct {
char eth_name[IFNAMSIZ + 1];
struct net_device *ndev;
};
struct {
char ib_name[IB_DEVICE_NAME_MAX + 1];
u8 ib_port;
};
};
};
/* Check if two given pnetids match */
......@@ -106,14 +103,15 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
/* remove netdevices */
/* remove table entry */
write_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
list) {
if (!pnet_name ||
smc_pnet_match(pnetelem->pnet_name, pnet_name)) {
list_del(&pnetelem->list);
dev_put(pnetelem->ndev);
if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev)
dev_put(pnetelem->ndev);
kfree(pnetelem);
rc = 0;
}
......@@ -155,9 +153,9 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
return rc;
}
/* Remove a pnet entry mentioning a given network device from the pnet table.
/* Add the reference to a given network device to the pnet table.
*/
static int smc_pnet_remove_by_ndev(struct net_device *ndev)
static int smc_pnet_add_by_ndev(struct net_device *ndev)
{
struct smc_pnetentry *pnetelem, *tmp_pe;
struct smc_pnettable *pnettable;
......@@ -171,10 +169,10 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
write_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
if (pnetelem->ndev == ndev) {
list_del(&pnetelem->list);
dev_put(pnetelem->ndev);
kfree(pnetelem);
if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev &&
!strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) {
dev_hold(ndev);
pnetelem->ndev = ndev;
rc = 0;
break;
}
......@@ -183,80 +181,67 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
return rc;
}
/* Append a pnetid to the end of the pnet table if not already on this list.
/* Remove the reference to a given network device from the pnet table.
*/
static int smc_pnet_enter(struct smc_pnettable *pnettable,
struct smc_user_pnetentry *new_pnetelem)
static int smc_pnet_remove_by_ndev(struct net_device *ndev)
{
u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
struct smc_pnetentry *tmp_pnetelem;
struct smc_pnetentry *pnetelem;
bool new_smcddev = false;
struct net_device *ndev;
bool new_netdev = true;
bool new_ibdev = false;
if (new_pnetelem->smcibdev) {
struct smc_ib_device *ib_dev = new_pnetelem->smcibdev;
int ib_port = new_pnetelem->ib_port;
struct smc_pnetentry *pnetelem, *tmp_pe;
struct smc_pnettable *pnettable;
struct net *net = dev_net(ndev);
struct smc_net *sn;
int rc = -ENOENT;
spin_lock(&smc_ib_devices.lock);
if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
memcpy(ib_dev->pnetid[ib_port - 1],
new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
ib_dev->pnetid_by_user[ib_port - 1] = true;
new_ibdev = true;
}
spin_unlock(&smc_ib_devices.lock);
}
if (new_pnetelem->smcd_dev) {
struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev;
/* get pnettable for namespace */
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
spin_lock(&smcd_dev_list.lock);
if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name,
SMC_MAX_PNETID_LEN);
smcd_dev->pnetid_by_user = true;
new_smcddev = true;
write_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) {
dev_put(pnetelem->ndev);
pnetelem->ndev = NULL;
rc = 0;
break;
}
spin_unlock(&smcd_dev_list.lock);
}
write_unlock(&pnettable->lock);
return rc;
}
if (!new_pnetelem->ndev)
return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
/* Apply pnetid to ib device when no pnetid is set.
*/
static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port,
char *pnet_name)
{
u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
bool applied = false;
/* check if (base) netdev already has a pnetid. If there is one, we do
* not want to add a pnet table entry
*/
ndev = pnet_find_base_ndev(new_pnetelem->ndev);
if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
ndev_pnetid))
return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
spin_lock(&smc_ib_devices.lock);
if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
memcpy(ib_dev->pnetid[ib_port - 1], pnet_name,
SMC_MAX_PNETID_LEN);
ib_dev->pnetid_by_user[ib_port - 1] = true;
applied = true;
}
spin_unlock(&smc_ib_devices.lock);
return applied;
}
/* add a new netdev entry to the pnet table if there isn't one */
tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
if (!tmp_pnetelem)
return -ENOMEM;
memcpy(tmp_pnetelem->pnet_name, new_pnetelem->pnet_name,
SMC_MAX_PNETID_LEN);
tmp_pnetelem->ndev = new_pnetelem->ndev;
/* Apply pnetid to smcd device when no pnetid is set.
*/
static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name)
{
u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
bool applied = false;
write_lock(&pnettable->lock);
list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
if (pnetelem->ndev == new_pnetelem->ndev)
new_netdev = false;
}
if (new_netdev) {
dev_hold(tmp_pnetelem->ndev);
list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist);
write_unlock(&pnettable->lock);
} else {
write_unlock(&pnettable->lock);
kfree(tmp_pnetelem);
spin_lock(&smcd_dev_list.lock);
if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN);
smcd_dev->pnetid_by_user = true;
applied = true;
}
return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST;
spin_unlock(&smcd_dev_list.lock);
return applied;
}
/* The limit for pnetid is 16 characters.
......@@ -323,57 +308,167 @@ static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name)
return smcd_dev;
}
/* Parse the supplied netlink attributes and fill a pnetentry structure.
* For ethernet and infiniband device names verify that the devices exist.
static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
char *eth_name, char *pnet_name)
{
struct smc_pnetentry *tmp_pe, *new_pe;
struct net_device *ndev, *base_ndev;
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
bool new_netdev;
int rc;
/* check if (base) netdev already has a pnetid. If there is one, we do
* not want to add a pnet table entry
*/
rc = -EEXIST;
ndev = dev_get_by_name(net, eth_name); /* dev_hold() */
if (ndev) {
base_ndev = pnet_find_base_ndev(ndev);
if (!smc_pnetid_by_dev_port(base_ndev->dev.parent,
base_ndev->dev_port, ndev_pnetid))
goto out_put;
}
/* add a new netdev entry to the pnet table if there isn't one */
rc = -ENOMEM;
new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL);
if (!new_pe)
goto out_put;
new_pe->type = SMC_PNET_ETH;
memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
new_pe->ndev = ndev;
rc = -EEXIST;
new_netdev = true;
write_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_ETH &&
!strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) {
new_netdev = false;
break;
}
}
if (new_netdev) {
list_add_tail(&new_pe->list, &pnettable->pnetlist);
write_unlock(&pnettable->lock);
} else {
write_unlock(&pnettable->lock);
kfree(new_pe);
goto out_put;
}
return 0;
out_put:
if (ndev)
dev_put(ndev);
return rc;
}
static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
u8 ib_port, char *pnet_name)
{
struct smc_pnetentry *tmp_pe, *new_pe;
struct smc_ib_device *ib_dev;
bool smcddev_applied = true;
bool ibdev_applied = true;
struct smcd_dev *smcd_dev;
bool new_ibdev;
/* try to apply the pnetid to active devices */
ib_dev = smc_pnet_find_ib(ib_name);
if (ib_dev)
ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name);
smcd_dev = smc_pnet_find_smcd(ib_name);
if (smcd_dev)
smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name);
/* Apply fails when a device has a hardware-defined pnetid set, do not
* add a pnet table entry in that case.
*/
if (!ibdev_applied || !smcddev_applied)
return -EEXIST;
/* add a new ib entry to the pnet table if there isn't one */
new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL);
if (!new_pe)
return -ENOMEM;
new_pe->type = SMC_PNET_IB;
memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
strncpy(new_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX);
new_pe->ib_port = ib_port;
new_ibdev = true;
write_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_IB &&
!strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
new_ibdev = false;
break;
}
}
if (new_ibdev) {
list_add_tail(&new_pe->list, &pnettable->pnetlist);
write_unlock(&pnettable->lock);
} else {
write_unlock(&pnettable->lock);
kfree(new_pe);
}
return (new_ibdev) ? 0 : -EEXIST;
}
/* Append a pnetid to the end of the pnet table if not already on this list.
*/
static int smc_pnet_fill_entry(struct net *net,
struct smc_user_pnetentry *pnetelem,
struct nlattr *tb[])
static int smc_pnet_enter(struct net *net, struct nlattr *tb[])
{
char *string, *ibname;
char pnet_name[SMC_MAX_PNETID_LEN + 1];
struct smc_pnettable *pnettable;
bool new_netdev = false;
bool new_ibdev = false;
struct smc_net *sn;
u8 ibport = 1;
char *string;
int rc;
memset(pnetelem, 0, sizeof(*pnetelem));
INIT_LIST_HEAD(&pnetelem->list);
/* get pnettable for namespace */
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
rc = -EINVAL;
if (!tb[SMC_PNETID_NAME])
goto error;
string = (char *)nla_data(tb[SMC_PNETID_NAME]);
if (!smc_pnetid_valid(string, pnetelem->pnet_name))
if (!smc_pnetid_valid(string, pnet_name))
goto error;
rc = -EINVAL;
if (tb[SMC_PNETID_ETHNAME]) {
string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
pnetelem->ndev = dev_get_by_name(net, string);
if (!pnetelem->ndev)
rc = smc_pnet_add_eth(pnettable, net, string, pnet_name);
if (!rc)
new_netdev = true;
else if (rc != -EEXIST)
goto error;
}
/* if this is not the initial namespace, stop here */
if (net != &init_net)
return 0;
return new_netdev ? 0 : -EEXIST;
rc = -EINVAL;
if (tb[SMC_PNETID_IBNAME]) {
ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
ibname = strim(ibname);
pnetelem->smcibdev = smc_pnet_find_ib(ibname);
pnetelem->smcd_dev = smc_pnet_find_smcd(ibname);
if (!pnetelem->smcibdev && !pnetelem->smcd_dev)
goto error;
if (pnetelem->smcibdev) {
if (!tb[SMC_PNETID_IBPORT])
goto error;
pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
if (pnetelem->ib_port < 1 ||
pnetelem->ib_port > SMC_MAX_PORTS)
string = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
string = strim(string);
if (tb[SMC_PNETID_IBPORT]) {
ibport = nla_get_u8(tb[SMC_PNETID_IBPORT]);
if (ibport < 1 || ibport > SMC_MAX_PORTS)
goto error;
}
rc = smc_pnet_add_ib(pnettable, string, ibport, pnet_name);
if (!rc)
new_ibdev = true;
else if (rc != -EEXIST)
goto error;
}
return 0;
return (new_netdev || new_ibdev) ? 0 : -EEXIST;
error:
return rc;
......@@ -381,28 +476,22 @@ static int smc_pnet_fill_entry(struct net *net,
/* Convert an smc_pnetentry to a netlink attribute sequence */
static int smc_pnet_set_nla(struct sk_buff *msg,
struct smc_user_pnetentry *pnetelem)
struct smc_pnetentry *pnetelem)
{
if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name))
return -1;
if (pnetelem->ndev) {
if (pnetelem->type == SMC_PNET_ETH) {
if (nla_put_string(msg, SMC_PNETID_ETHNAME,
pnetelem->ndev->name))
pnetelem->eth_name))
return -1;
} else {
if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a"))
return -1;
}
if (pnetelem->smcibdev) {
if (nla_put_string(msg, SMC_PNETID_IBNAME,
dev_name(pnetelem->smcibdev->ibdev->dev.parent)) ||
if (pnetelem->type == SMC_PNET_IB) {
if (nla_put_string(msg, SMC_PNETID_IBNAME, pnetelem->ib_name) ||
nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
return -1;
} else if (pnetelem->smcd_dev) {
if (nla_put_string(msg, SMC_PNETID_IBNAME,
dev_name(&pnetelem->smcd_dev->dev)) ||
nla_put_u8(msg, SMC_PNETID_IBPORT, 1))
return -1;
} else {
if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") ||
nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff))
......@@ -415,21 +504,8 @@ static int smc_pnet_set_nla(struct sk_buff *msg,
static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
struct smc_user_pnetentry pnetelem;
struct smc_pnettable *pnettable;
struct smc_net *sn;
int rc;
/* get pnettable for namespace */
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs);
if (!rc)
rc = smc_pnet_enter(pnettable, &pnetelem);
if (pnetelem.ndev)
dev_put(pnetelem.ndev);
return rc;
return smc_pnet_enter(net, info->attrs);
}
static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
......@@ -450,7 +526,7 @@ static int smc_pnet_dump_start(struct netlink_callback *cb)
static int smc_pnet_dumpinfo(struct sk_buff *skb,
u32 portid, u32 seq, u32 flags,
struct smc_user_pnetentry *pnetelem)
struct smc_pnetentry *pnetelem)
{
void *hdr;
......@@ -469,91 +545,32 @@ static int smc_pnet_dumpinfo(struct sk_buff *skb,
static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
u32 seq, u8 *pnetid, int start_idx)
{
struct smc_user_pnetentry tmp_entry;
struct smc_pnettable *pnettable;
struct smc_pnetentry *pnetelem;
struct smc_ib_device *ibdev;
struct smcd_dev *smcd_dev;
struct smc_net *sn;
int idx = 0;
int ibport;
/* get pnettable for namespace */
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
/* dump netdevices */
/* dump pnettable entries */
read_lock(&pnettable->lock);
list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
continue;
if (idx++ < start_idx)
continue;
memset(&tmp_entry, 0, sizeof(tmp_entry));
memcpy(&tmp_entry.pnet_name, pnetelem->pnet_name,
SMC_MAX_PNETID_LEN);
tmp_entry.ndev = pnetelem->ndev;
/* if this is not the initial namespace, dump only netdev */
if (net != &init_net && pnetelem->type != SMC_PNET_ETH)
continue;
if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
&tmp_entry)) {
pnetelem)) {
--idx;
break;
}
}
read_unlock(&pnettable->lock);
/* if this is not the initial namespace, stop here */
if (net != &init_net)
return idx;
/* dump ib devices */
spin_lock(&smc_ib_devices.lock);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
if (ibdev->pnetid_by_user[ibport]) {
if (pnetid &&
!smc_pnet_match(ibdev->pnetid[ibport],
pnetid))
continue;
if (idx++ < start_idx)
continue;
memset(&tmp_entry, 0, sizeof(tmp_entry));
memcpy(&tmp_entry.pnet_name,
ibdev->pnetid[ibport],
SMC_MAX_PNETID_LEN);
tmp_entry.smcibdev = ibdev;
tmp_entry.ib_port = ibport + 1;
if (smc_pnet_dumpinfo(skb, portid, seq,
NLM_F_MULTI,
&tmp_entry)) {
--idx;
break;
}
}
}
}
spin_unlock(&smc_ib_devices.lock);
/* dump smcd devices */
spin_lock(&smcd_dev_list.lock);
list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
if (smcd_dev->pnetid_by_user) {
if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid))
continue;
if (idx++ < start_idx)
continue;
memset(&tmp_entry, 0, sizeof(tmp_entry));
memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid,
SMC_MAX_PNETID_LEN);
tmp_entry.smcd_dev = smcd_dev;
if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
&tmp_entry)) {
--idx;
break;
}
}
}
spin_unlock(&smcd_dev_list.lock);
return idx;
}
......@@ -659,6 +676,9 @@ static int smc_pnet_netdev_event(struct notifier_block *this,
case NETDEV_UNREGISTER:
smc_pnet_remove_by_ndev(event_dev);
return NOTIFY_OK;
case NETDEV_REGISTER:
smc_pnet_add_by_ndev(event_dev);
return NOTIFY_OK;
default:
return NOTIFY_DONE;
}
......@@ -744,7 +764,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
read_lock(&pnettable->lock);
list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
if (ndev == pnetelem->ndev) {
if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) {
/* get pnetid of netdev device */
memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
rc = 0;
......@@ -755,6 +775,34 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
return rc;
}
/* find a roce device for the given pnetid */
static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
struct smc_init_info *ini)
{
struct smc_ib_device *ibdev;
int i;
ini->ib_dev = NULL;
spin_lock(&smc_ib_devices.lock);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
for (i = 1; i <= SMC_MAX_PORTS; i++) {
if (!rdma_is_port_valid(ibdev->ibdev, i))
continue;
if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) &&
smc_ib_port_active(ibdev, i) &&
!test_bit(i - 1, ibdev->ports_going_away) &&
!smc_ib_determine_gid(ibdev, i, ini->vlan_id,
ini->ib_gid, NULL)) {
ini->ib_dev = ibdev;
ini->ib_port = i;
goto out;
}
}
}
out:
spin_unlock(&smc_ib_devices.lock);
}
/* if handshake network device belongs to a roce device, return its
* IB device and port
*/
......@@ -801,8 +849,6 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
struct smc_init_info *ini)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
struct smc_ib_device *ibdev;
int i;
ndev = pnet_find_base_ndev(ndev);
if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
......@@ -811,25 +857,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
smc_pnet_find_rdma_dev(ndev, ini);
return; /* pnetid could not be determined */
}
spin_lock(&smc_ib_devices.lock);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
for (i = 1; i <= SMC_MAX_PORTS; i++) {
if (!rdma_is_port_valid(ibdev->ibdev, i))
continue;
if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) &&
smc_ib_port_active(ibdev, i) &&
!test_bit(i - 1, ibdev->ports_going_away) &&
!smc_ib_determine_gid(ibdev, i, ini->vlan_id,
ini->ib_gid, NULL)) {
ini->ib_dev = ibdev;
ini->ib_port = i;
goto out;
}
}
}
out:
spin_unlock(&smc_ib_devices.lock);
_smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini);
}
static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
......@@ -895,3 +923,60 @@ void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini)
out:
return;
}
/* Lookup and apply a pnet table entry to the given ib device.
*/
int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
{
char *ib_name = smcibdev->ibdev->name;
struct smc_pnettable *pnettable;
struct smc_pnetentry *tmp_pe;
struct smc_net *sn;
int rc = -ENOENT;
/* get pnettable for init namespace */
sn = net_generic(&init_net, smc_net_id);
pnettable = &sn->pnettable;
read_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_IB &&
!strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) &&
tmp_pe->ib_port == ib_port) {
smc_pnet_apply_ib(smcibdev, ib_port, tmp_pe->pnet_name);
rc = 0;
break;
}
}
read_unlock(&pnettable->lock);
return rc;
}
/* Lookup and apply a pnet table entry to the given smcd device.
*/
int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
{
const char *ib_name = dev_name(&smcddev->dev);
struct smc_pnettable *pnettable;
struct smc_pnetentry *tmp_pe;
struct smc_net *sn;
int rc = -ENOENT;
/* get pnettable for init namespace */
sn = net_generic(&init_net, smc_net_id);
pnettable = &sn->pnettable;
read_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_IB &&
!strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
smc_pnet_apply_smcd(smcddev, tmp_pe->pnet_name);
rc = 0;
break;
}
}
read_unlock(&pnettable->lock);
return rc;
}
......@@ -46,5 +46,7 @@ void smc_pnet_exit(void);
void smc_pnet_net_exit(struct net *net);
void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini);
void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini);
int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port);
int smc_pnetid_by_table_smcd(struct smcd_dev *smcd);
#endif
......@@ -269,19 +269,18 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
int num_sges, struct ib_rdma_wr *rdma_wr)
{
struct smc_link_group *lgr = conn->lgr;
struct smc_link *link;
struct smc_link *link = conn->lnk;
int rc;
link = &lgr->lnk[SMC_SINGLE_LINK];
rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link);
rdma_wr->wr.num_sge = num_sges;
rdma_wr->remote_addr =
lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr +
lgr->rtokens[conn->rtoken_idx][link->link_idx].dma_addr +
/* RMBE within RMB */
conn->tx_off +
/* offset within RMBE */
peer_rmbe_offset;
rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][link->link_idx].rkey;
rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
if (rc)
smc_lgr_terminate_sched(lgr);
......@@ -310,8 +309,10 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
size_t dst_off, size_t dst_len,
struct smc_rdma_wr *wr_rdma_buf)
{
struct smc_link *link = conn->lnk;
dma_addr_t dma_addr =
sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
sg_dma_address(conn->sndbuf_desc->sgt[link->link_idx].sgl);
int src_len_sum = src_len, dst_len_sum = dst_len;
int sent_count = src_off;
int srcchunk, dstchunk;
......@@ -507,7 +508,7 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
if (!pflags->urg_data_present) {
rc = smc_tx_rdma_writes(conn, wr_rdma_buf);
if (rc) {
smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
smc_wr_tx_put_slot(conn->lnk,
(struct smc_wr_tx_pend_priv *)pend);
goto out_unlock;
}
......
......@@ -207,7 +207,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
} else {
rc = wait_event_interruptible_timeout(
link->wr_tx_wait,
link->state == SMC_LNK_INACTIVE ||
!smc_link_usable(link) ||
lgr->terminating ||
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment