Commit 25ed57b8 authored by David S. Miller's avatar David S. Miller

Merge branch 'smc-next'

Karsten Graul says:

====================
net/smc: updates 2020-09-10

Please apply the following patch series for smc to netdev's net-next tree.

This patch series is a mix of various improvements and cleanups.
The patches 1 and 10 improve the handling of large parallel workloads.
Patch 8 corrects a kernel config default for config CCWGROUP on s390.
Patch 9 allows userspace tools to retrieve socket information for more
sockets.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b55353e2 22ef473d
......@@ -107,7 +107,7 @@ config QETH_OSX
config CCWGROUP
tristate
default (LCS || CTCM || QETH)
default (LCS || CTCM || QETH || SMC)
config ISM
tristate "Support for ISM vPCI Adapter"
......
This diff is collapsed.
......@@ -18,6 +18,8 @@
#include "smc_ib.h"
#define SMC_V1 1 /* SMC version V1 */
#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
......@@ -201,6 +203,8 @@ struct smc_connection {
struct smc_sock { /* smc sock container */
struct sock sk;
struct socket *clcsock; /* internal tcp socket */
void (*clcsk_data_ready)(struct sock *sk);
/* original data_ready fct. **/
struct smc_connection conn; /* smc connection */
struct smc_sock *listen_smc; /* listen parent */
struct work_struct connect_work; /* handle non-blocking connect*/
......@@ -235,6 +239,9 @@ static inline struct smc_sock *smc_sk(const struct sock *sk)
return (struct smc_sock *)sk;
}
extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
extern struct workqueue_struct *smc_close_wq; /* wq for close work */
#define SMC_SYSTEMID_LEN 8
extern u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */
......
......@@ -299,7 +299,7 @@ static void smc_cdc_msg_validate(struct smc_sock *smc, struct smc_cdc_msg *cdc,
conn->lnk = link;
spin_unlock_bh(&conn->send_lock);
sock_hold(&smc->sk); /* sock_put in abort_work */
if (!schedule_work(&conn->abort_work))
if (!queue_work(smc_close_wq, &conn->abort_work))
sock_put(&smc->sk);
}
}
......@@ -368,7 +368,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(&smc->sk, SOCK_DONE);
sock_hold(&smc->sk); /* sock_put in close_work */
if (!schedule_work(&conn->close_work))
if (!queue_work(smc_close_wq, &conn->close_work))
sock_put(&smc->sk);
}
}
......
This diff is collapsed.
......@@ -22,7 +22,6 @@
#define SMC_CLC_CONFIRM 0x03
#define SMC_CLC_DECLINE 0x04
#define SMC_CLC_V1 0x1 /* SMC version */
#define SMC_TYPE_R 0 /* SMC-R only */
#define SMC_TYPE_D 1 /* SMC-D only */
#define SMC_TYPE_N 2 /* neither SMC-R nor SMC-D */
......@@ -38,7 +37,6 @@
#define SMC_CLC_DECL_NOSMCDEV 0x03030000 /* no SMC device found (R or D) */
#define SMC_CLC_DECL_NOSMCDDEV 0x03030001 /* no SMC-D device found */
#define SMC_CLC_DECL_NOSMCRDEV 0x03030002 /* no SMC-R device found */
#define SMC_CLC_DECL_SMCDNOTALK 0x03030003 /* SMC-D dev can't talk to peer */
#define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/
#define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */
#define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */
......@@ -111,25 +109,21 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
__be16 iparea_offset; /* offset to IP address information area */
} __aligned(4);
#define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28
#define SMC_CLC_PROPOSAL_MAX_PREFIX (SMC_CLC_MAX_V6_PREFIX * \
sizeof(struct smc_clc_ipv6_prefix))
#define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \
SMC_CLC_PROPOSAL_MAX_OFFSET + \
sizeof(struct smc_clc_msg_proposal_prefix) + \
SMC_CLC_PROPOSAL_MAX_PREFIX + \
sizeof(struct smc_clc_msg_trail))
struct smc_clc_msg_proposal_area {
struct smc_clc_msg_proposal pclc_base;
struct smc_clc_msg_smcd pclc_smcd;
struct smc_clc_msg_proposal_prefix pclc_prfx;
struct smc_clc_ipv6_prefix pclc_prfx_ipv6[SMC_CLC_MAX_V6_PREFIX];
struct smc_clc_msg_trail pclc_trl;
};
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
union {
struct { /* SMC-R */
struct smcr_clc_msg_accept_confirm { /* SMCR accept/confirm */
struct smc_clc_msg_local lcl;
u8 qpn[3]; /* QP number */
__be32 rmb_rkey; /* RMB rkey */
u8 rmbe_idx; /* Index of RMBE in RMB */
__be32 rmbe_alert_token;/* unique connection id */
#if defined(__BIG_ENDIAN_BITFIELD)
__be32 rmbe_alert_token; /* unique connection id */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 rmbe_size : 4, /* buf size (compressed) */
qp_mtu : 4; /* QP mtu */
#elif defined(__LITTLE_ENDIAN_BITFIELD)
......@@ -142,8 +136,9 @@ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
u8 psn[3]; /* packet sequence number */
struct smc_clc_msg_trail smcr_trl;
/* eye catcher "SMCR" EBCDIC */
} __packed;
struct { /* SMC-D */
} __packed;
struct smcd_clc_msg_accept_confirm { /* SMCD accept/confirm */
u64 gid; /* Sender GID */
u64 token; /* DMB token */
u8 dmbe_idx; /* DMBE index */
......@@ -159,7 +154,13 @@ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
u32 reserved5[3];
struct smc_clc_msg_trail smcd_trl;
/* eye catcher "SMCD" EBCDIC */
} __packed;
} __packed;
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
union {
struct smcr_clc_msg_accept_confirm r0; /* SMC-R */
struct smcd_clc_msg_accept_confirm d0; /* SMC-D */
};
} __packed; /* format defined in RFC7609 */
......@@ -200,6 +201,6 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
struct smc_init_info *ini);
int smc_clc_send_confirm(struct smc_sock *smc);
int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact);
int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact);
#endif
......@@ -208,12 +208,11 @@ int smc_close_active(struct smc_sock *smc)
break;
case SMC_LISTEN:
sk->sk_state = SMC_CLOSED;
smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
smc->clcsock->sk->sk_user_data = NULL;
sk->sk_state_change(sk); /* wake up accept */
if (smc->clcsock && smc->clcsock->sk) {
if (smc->clcsock && smc->clcsock->sk)
rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
/* wake up kernel_accept of smc_tcp_listen_worker */
smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
}
smc_close_cleanup_listen(sk);
release_sock(sk);
flush_work(&smc->tcp_listen_work);
......
......@@ -34,7 +34,6 @@
#define SMC_LGR_NUM_INCR 256
#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
#define SMC_LGR_FREE_DELAY_FAST (8 * HZ)
static struct smc_lgr_list smc_lgr_list = { /* established link groups */
.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
......@@ -70,7 +69,7 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
* creation. For client use a somewhat higher removal delay time,
* otherwise there is a risk of out-of-sync link groups.
*/
if (!lgr->freeing && !lgr->freefast) {
if (!lgr->freeing) {
mod_delayed_work(system_wq, &lgr->free_work,
(!lgr->is_smcd && lgr->role == SMC_CLNT) ?
SMC_LGR_FREE_DELAY_CLNT :
......@@ -78,15 +77,6 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
}
}
void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
{
if (!lgr->freeing && !lgr->freefast) {
lgr->freefast = 1;
mod_delayed_work(system_wq, &lgr->free_work,
SMC_LGR_FREE_DELAY_FAST);
}
}
/* Register connection's alert token in our lookup structure.
* To use rbtrees we have to implement our own insert core.
* Requires @conns_lock
......@@ -227,7 +217,7 @@ void smc_lgr_cleanup_early(struct smc_connection *conn)
if (!list_empty(lgr_list))
list_del_init(lgr_list);
spin_unlock_bh(lgr_lock);
smc_lgr_schedule_free_work_fast(lgr);
__smc_lgr_terminate(lgr, true);
}
static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
......@@ -396,10 +386,15 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
rc = SMC_CLC_DECL_MEM;
goto ism_put_vlan;
}
lgr->tx_wq = alloc_workqueue("smc_tx_wq-%*phN", 0, 0,
SMC_LGR_ID_SIZE, &lgr->id);
if (!lgr->tx_wq) {
rc = -ENOMEM;
goto free_lgr;
}
lgr->is_smcd = ini->is_smcd;
lgr->sync_err = 0;
lgr->terminating = 0;
lgr->freefast = 0;
lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
mutex_init(&lgr->sndbufs_lock);
......@@ -418,7 +413,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
if (ini->is_smcd) {
/* SMC-D specific settings */
get_device(&ini->ism_dev->dev);
lgr->peer_gid = ini->ism_gid;
lgr->peer_gid = ini->ism_peer_gid;
lgr->smcd = ini->ism_dev;
lgr_list = &ini->ism_dev->lgr_list;
lgr_lock = &lgr->smcd->lgr_lock;
......@@ -437,7 +432,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lnk = &lgr->lnk[link_idx];
rc = smcr_link_init(lgr, lnk, link_idx, ini);
if (rc)
goto free_lgr;
goto free_wq;
lgr_list = &smc_lgr_list.list;
lgr_lock = &smc_lgr_list.lock;
atomic_inc(&lgr_cnt);
......@@ -448,6 +443,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
spin_unlock_bh(lgr_lock);
return 0;
free_wq:
destroy_workqueue(lgr->tx_wq);
free_lgr:
kfree(lgr);
ism_put_vlan:
......@@ -517,7 +514,7 @@ static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
smc->sk.sk_state != SMC_CLOSED) {
rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
if (!rc) {
schedule_delayed_work(&conn->tx_work, 0);
queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, 0);
smc->sk.sk_data_ready(&smc->sk);
}
} else {
......@@ -824,11 +821,10 @@ static void smc_lgr_free(struct smc_link_group *lgr)
}
smc_lgr_free_bufs(lgr);
destroy_workqueue(lgr->tx_wq);
if (lgr->is_smcd) {
if (!lgr->terminating) {
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
put_device(&lgr->smcd->dev);
}
if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
wake_up(&lgr->smcd->lgrs_deleted);
} else {
......@@ -889,8 +885,6 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr)
if (lgr->is_smcd) {
smc_ism_signal_shutdown(lgr);
smcd_unregister_all_dmbs(lgr);
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
put_device(&lgr->smcd->dev);
} else {
u32 rsn = lgr->llc_termination_rsn;
......@@ -1296,9 +1290,9 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
ini->cln_first_contact = SMC_FIRST_CONTACT;
ini->first_contact_local = 1;
role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
if (role == SMC_CLNT && ini->srv_first_contact)
if (role == SMC_CLNT && ini->first_contact_peer)
/* create new link group as well */
goto create;
......@@ -1307,14 +1301,14 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
list_for_each_entry(lgr, lgr_list, list) {
write_lock_bh(&lgr->conns_lock);
if ((ini->is_smcd ?
smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
smcd_lgr_match(lgr, ini->ism_dev, ini->ism_peer_gid) :
smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
!lgr->sync_err &&
lgr->vlan_id == ini->vlan_id &&
(role == SMC_CLNT || ini->is_smcd ||
lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
/* link group found */
ini->cln_first_contact = SMC_REUSE_CONTACT;
ini->first_contact_local = 0;
conn->lgr = lgr;
rc = smc_lgr_register_conn(conn, false);
write_unlock_bh(&lgr->conns_lock);
......@@ -1328,8 +1322,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
if (rc)
return rc;
if (role == SMC_CLNT && !ini->srv_first_contact &&
ini->cln_first_contact == SMC_FIRST_CONTACT) {
if (role == SMC_CLNT && !ini->first_contact_peer &&
ini->first_contact_local) {
/* Server reuses a link group, but Client wants to start
* a new one
* send out_of_sync decline, reason synchr. error
......@@ -1338,7 +1332,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
}
create:
if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
if (ini->first_contact_local) {
rc = smc_lgr_create(smc, ini);
if (rc)
goto out;
......@@ -1892,8 +1886,8 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn,
struct smc_link *lnk,
struct smc_clc_msg_accept_confirm *clc)
{
conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
clc->rmb_rkey);
conn->rtoken_idx = smc_rtoken_add(lnk, clc->r0.rmb_dma_addr,
clc->r0.rmb_rkey);
if (conn->rtoken_idx < 0)
return conn->rtoken_idx;
return 0;
......
......@@ -137,9 +137,6 @@ struct smc_link {
#define SMC_LINKS_PER_LGR_MAX 3
#define SMC_SINGLE_LINK 0
#define SMC_FIRST_CONTACT 1 /* first contact to a peer */
#define SMC_REUSE_CONTACT 0 /* follow-on contact to a peer*/
/* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */
struct smc_buf_desc {
struct list_head list;
......@@ -228,9 +225,9 @@ struct smc_link_group {
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
struct work_struct terminate_work; /* abnormal lgr termination */
struct workqueue_struct *tx_wq; /* wq for conn. tx workers */
u8 sync_err : 1; /* lgr no longer fits to peer */
u8 terminating : 1;/* lgr is terminating */
u8 freefast : 1; /* free worker scheduled fast */
u8 freeing : 1; /* lgr is being freed */
bool is_smcd; /* SMC-R or SMC-D */
......@@ -294,9 +291,9 @@ struct smc_clc_msg_local;
struct smc_init_info {
u8 is_smcd;
u8 first_contact_peer;
u8 first_contact_local;
unsigned short vlan_id;
int srv_first_contact;
int cln_first_contact;
/* SMC-R */
struct smc_clc_msg_local *ib_lcl;
struct smc_ib_device *ib_dev;
......@@ -304,7 +301,7 @@ struct smc_init_info {
u8 ib_port;
u32 ib_clcqpn;
/* SMC-D */
u64 ism_gid;
u64 ism_peer_gid;
struct smcd_dev *ism_dev;
};
......
......@@ -22,6 +22,15 @@
#include "smc.h"
#include "smc_core.h"
struct smc_diag_dump_ctx {
int pos[2];
};
static struct smc_diag_dump_ctx *smc_dump_context(struct netlink_callback *cb)
{
return (struct smc_diag_dump_ctx *)cb->ctx;
}
static void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw)
{
sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x",
......@@ -193,13 +202,15 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
}
static int smc_diag_dump_proto(struct proto *prot, struct sk_buff *skb,
struct netlink_callback *cb)
struct netlink_callback *cb, int p_type)
{
struct smc_diag_dump_ctx *cb_ctx = smc_dump_context(cb);
struct net *net = sock_net(skb->sk);
int snum = cb_ctx->pos[p_type];
struct nlattr *bc = NULL;
struct hlist_head *head;
int rc = 0, num = 0;
struct sock *sk;
int rc = 0;
read_lock(&prot->h.smc_hash->lock);
head = &prot->h.smc_hash->ht;
......@@ -209,13 +220,18 @@ static int smc_diag_dump_proto(struct proto *prot, struct sk_buff *skb,
sk_for_each(sk, head) {
if (!net_eq(sock_net(sk), net))
continue;
if (num < snum)
goto next;
rc = __smc_diag_dump(sk, skb, cb, nlmsg_data(cb->nlh), bc);
if (rc)
break;
if (rc < 0)
goto out;
next:
num++;
}
out:
read_unlock(&prot->h.smc_hash->lock);
cb_ctx->pos[p_type] = num;
return rc;
}
......@@ -223,10 +239,10 @@ static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
int rc = 0;
rc = smc_diag_dump_proto(&smc_proto, skb, cb);
rc = smc_diag_dump_proto(&smc_proto, skb, cb, SMCPROTO_SMC);
if (!rc)
rc = smc_diag_dump_proto(&smc_proto6, skb, cb);
return rc;
smc_diag_dump_proto(&smc_proto6, skb, cb, SMCPROTO_SMC6);
return skb->len;
}
static int smc_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
......
......@@ -1691,7 +1691,7 @@ static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc)
spin_lock_irqsave(&lgr->llc_event_q_lock, flags);
list_add_tail(&qentry->list, &lgr->llc_event_q);
spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags);
schedule_work(&lgr->llc_event_work);
queue_work(system_highpri_wq, &lgr->llc_event_work);
}
/* copy received msg and add it to the event queue */
......
......@@ -928,7 +928,10 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
mutex_lock(&smcd_dev_list.mutex);
list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) &&
!ismdev->going_away) {
!ismdev->going_away &&
(!ini->ism_peer_gid ||
!smc_ism_cantalk(ini->ism_peer_gid, ini->vlan_id,
ismdev))) {
ini->ism_dev = ismdev;
break;
}
......
......@@ -228,7 +228,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
/* for a corked socket defer the RDMA writes if there
* is still sufficient sndbuf_space available
*/
schedule_delayed_work(&conn->tx_work,
queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work,
SMC_TX_CORK_DELAY);
else
smc_tx_sndbuf_nonempty(conn);
......@@ -499,7 +499,7 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
if (conn->killed)
return -EPIPE;
rc = 0;
mod_delayed_work(system_wq, &conn->tx_work,
mod_delayed_work(conn->lgr->tx_wq, &conn->tx_work,
SMC_TX_WORK_DELAY);
}
return rc;
......@@ -623,7 +623,7 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
return;
if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
!conn->killed) {
schedule_delayed_work(&conn->tx_work,
queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work,
SMC_TX_WORK_DELAY);
return;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment