Commit 68e995a2 authored by Pradeep Satyanarayana's avatar Pradeep Satyanarayana Committed by Roland Dreier

IPoIB/cm: Add connected mode support for devices without SRQs

Some IB adapters (notably IBM's eHCA) do not implement SRQs (shared
receive queues).  The current IPoIB connected mode support only works
on devices that support SRQs.

Fix this by adding support for using the receive queue of each
connected mode receive QP.  The disadvantage of this compared to using
an SRQ is that it means a full queue of receives must be posted for
each remote connected mode peer, which means that total memory usage
is potentially much higher than when using SRQs.  To manage this, add
a new module parameter "max_nonsrq_conn_qp" that limits the number of
connections allowed per interface.

The rest of the changes are fairly straightforward: we use a table of
struct ipoib_cm_rx to hold all the active connections, and put the
table index of the connection in the high bits of receive WR IDs.
This is needed because we cannot rely on the struct ib_wc.qp field for
non-SRQ receive completions.  Most of the rest of the changes just
test whether or not an SRQ is available, and post receives or find
received packets in the right place depending on the answer.

Cleaning up dead connections actually becomes simpler, because we do
not have to do the "last WQE reached" dance that is required to
destroy QPs attached to an SRQ.  We just move the QP to the error
state and wait for all pending receives to be flushed.
Signed-off-by: default avatarPradeep Satyanarayana <pradeeps@linux.vnet.ibm.com>

[ Completely rewritten and split up, based on Pradeep's work.  Several
  bugs fixed and no doubt several bugs introduced.  - Roland ]
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent efcd9971
...@@ -69,6 +69,7 @@ enum { ...@@ -69,6 +69,7 @@ enum {
IPOIB_TX_RING_SIZE = 64, IPOIB_TX_RING_SIZE = 64,
IPOIB_MAX_QUEUE_SIZE = 8192, IPOIB_MAX_QUEUE_SIZE = 8192,
IPOIB_MIN_QUEUE_SIZE = 2, IPOIB_MIN_QUEUE_SIZE = 2,
IPOIB_CM_MAX_CONN_QP = 4096,
IPOIB_NUM_WC = 4, IPOIB_NUM_WC = 4,
...@@ -188,10 +189,12 @@ enum ipoib_cm_state { ...@@ -188,10 +189,12 @@ enum ipoib_cm_state {
struct ipoib_cm_rx { struct ipoib_cm_rx {
struct ib_cm_id *id; struct ib_cm_id *id;
struct ib_qp *qp; struct ib_qp *qp;
struct ipoib_cm_rx_buf *rx_ring;
struct list_head list; struct list_head list;
struct net_device *dev; struct net_device *dev;
unsigned long jiffies; unsigned long jiffies;
enum ipoib_cm_state state; enum ipoib_cm_state state;
int recv_count;
}; };
struct ipoib_cm_tx { struct ipoib_cm_tx {
...@@ -234,6 +237,7 @@ struct ipoib_cm_dev_priv { ...@@ -234,6 +237,7 @@ struct ipoib_cm_dev_priv {
struct ib_wc ibwc[IPOIB_NUM_WC]; struct ib_wc ibwc[IPOIB_NUM_WC];
struct ib_sge rx_sge[IPOIB_CM_RX_SG]; struct ib_sge rx_sge[IPOIB_CM_RX_SG];
struct ib_recv_wr rx_wr; struct ib_recv_wr rx_wr;
int nonsrq_conn_qp;
}; };
/* /*
...@@ -461,6 +465,8 @@ void ipoib_drain_cq(struct net_device *dev); ...@@ -461,6 +465,8 @@ void ipoib_drain_cq(struct net_device *dev);
/* We don't support UC connections at the moment */ /* We don't support UC connections at the moment */
#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC)) #define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC))
extern int ipoib_max_conn_qp;
static inline int ipoib_cm_admin_enabled(struct net_device *dev) static inline int ipoib_cm_admin_enabled(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
...@@ -491,6 +497,12 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t ...@@ -491,6 +497,12 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t
neigh->cm = tx; neigh->cm = tx;
} }
static inline int ipoib_cm_has_srq(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
return !!priv->cm.srq;
}
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx); void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx);
int ipoib_cm_dev_open(struct net_device *dev); int ipoib_cm_dev_open(struct net_device *dev);
void ipoib_cm_dev_stop(struct net_device *dev); void ipoib_cm_dev_stop(struct net_device *dev);
...@@ -508,6 +520,8 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc); ...@@ -508,6 +520,8 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc);
struct ipoib_cm_tx; struct ipoib_cm_tx;
#define ipoib_max_conn_qp 0
static inline int ipoib_cm_admin_enabled(struct net_device *dev) static inline int ipoib_cm_admin_enabled(struct net_device *dev)
{ {
return 0; return 0;
...@@ -533,6 +547,11 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t ...@@ -533,6 +547,11 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t
{ {
} }
static inline int ipoib_cm_has_srq(struct net_device *dev)
{
return 0;
}
static inline static inline
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
{ {
......
This diff is collapsed.
...@@ -1268,6 +1268,9 @@ static int __init ipoib_init_module(void) ...@@ -1268,6 +1268,9 @@ static int __init ipoib_init_module(void)
ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE); ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE);
#ifdef CONFIG_INFINIBAND_IPOIB_CM
ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
#endif
ret = ipoib_register_debugfs(); ret = ipoib_register_debugfs();
if (ret) if (ret)
......
...@@ -172,8 +172,12 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -172,8 +172,12 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
size = ipoib_sendq_size + ipoib_recvq_size + 1; size = ipoib_sendq_size + ipoib_recvq_size + 1;
ret = ipoib_cm_dev_init(dev); ret = ipoib_cm_dev_init(dev);
if (!ret) if (!ret) {
size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */; if (ipoib_cm_has_srq(dev))
size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
else
size += ipoib_recvq_size * ipoib_max_conn_qp;
}
priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
if (IS_ERR(priv->cq)) { if (IS_ERR(priv->cq)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment