Commit 0f485251 authored by Shirley Ma's avatar Shirley Ma Committed by Roland Dreier

IPoIB: Make send and receive queue sizes tunable

Make IPoIB's send and receive queue sizes tunable via module
parameters ("send_queue_size" and "recv_queue_size").  This allows the
queue sizes to be enlarged to fix disastrously bad performance on some
platforms and workloads, without bloating memory usage when large
queues aren't needed.
Signed-off-by: default avatarShirley Ma <xma@us.ibm.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent f2de3b06
...@@ -65,6 +65,8 @@ enum { ...@@ -65,6 +65,8 @@ enum {
IPOIB_RX_RING_SIZE = 128, IPOIB_RX_RING_SIZE = 128,
IPOIB_TX_RING_SIZE = 64, IPOIB_TX_RING_SIZE = 64,
IPOIB_MAX_QUEUE_SIZE = 8192,
IPOIB_MIN_QUEUE_SIZE = 2,
IPOIB_NUM_WC = 4, IPOIB_NUM_WC = 4,
...@@ -332,6 +334,8 @@ static inline void ipoib_unregister_debugfs(void) { } ...@@ -332,6 +334,8 @@ static inline void ipoib_unregister_debugfs(void) { }
#define ipoib_warn(priv, format, arg...) \ #define ipoib_warn(priv, format, arg...) \
ipoib_printk(KERN_WARNING, priv, format , ## arg) ipoib_printk(KERN_WARNING, priv, format , ## arg)
extern int ipoib_sendq_size;
extern int ipoib_recvq_size;
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
extern int ipoib_debug_level; extern int ipoib_debug_level;
......
...@@ -161,7 +161,7 @@ static int ipoib_ib_post_receives(struct net_device *dev) ...@@ -161,7 +161,7 @@ static int ipoib_ib_post_receives(struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
int i; int i;
for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) { for (i = 0; i < ipoib_recvq_size; ++i) {
if (ipoib_alloc_rx_skb(dev, i)) { if (ipoib_alloc_rx_skb(dev, i)) {
ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
return -ENOMEM; return -ENOMEM;
...@@ -187,7 +187,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev, ...@@ -187,7 +187,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
if (wr_id & IPOIB_OP_RECV) { if (wr_id & IPOIB_OP_RECV) {
wr_id &= ~IPOIB_OP_RECV; wr_id &= ~IPOIB_OP_RECV;
if (wr_id < IPOIB_RX_RING_SIZE) { if (wr_id < ipoib_recvq_size) {
struct sk_buff *skb = priv->rx_ring[wr_id].skb; struct sk_buff *skb = priv->rx_ring[wr_id].skb;
dma_addr_t addr = priv->rx_ring[wr_id].mapping; dma_addr_t addr = priv->rx_ring[wr_id].mapping;
...@@ -252,9 +252,9 @@ static void ipoib_ib_handle_wc(struct net_device *dev, ...@@ -252,9 +252,9 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
struct ipoib_tx_buf *tx_req; struct ipoib_tx_buf *tx_req;
unsigned long flags; unsigned long flags;
if (wr_id >= IPOIB_TX_RING_SIZE) { if (wr_id >= ipoib_sendq_size) {
ipoib_warn(priv, "completion event with wrid %d (> %d)\n", ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
wr_id, IPOIB_TX_RING_SIZE); wr_id, ipoib_sendq_size);
return; return;
} }
...@@ -275,7 +275,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev, ...@@ -275,7 +275,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
spin_lock_irqsave(&priv->tx_lock, flags); spin_lock_irqsave(&priv->tx_lock, flags);
++priv->tx_tail; ++priv->tx_tail;
if (netif_queue_stopped(dev) && if (netif_queue_stopped(dev) &&
priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2) priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
netif_wake_queue(dev); netif_wake_queue(dev);
spin_unlock_irqrestore(&priv->tx_lock, flags); spin_unlock_irqrestore(&priv->tx_lock, flags);
...@@ -344,13 +344,13 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ...@@ -344,13 +344,13 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
* means we have to make sure everything is properly recorded and * means we have to make sure everything is properly recorded and
* our state is consistent before we call post_send(). * our state is consistent before we call post_send().
*/ */
tx_req = &priv->tx_ring[priv->tx_head & (IPOIB_TX_RING_SIZE - 1)]; tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
tx_req->skb = skb; tx_req->skb = skb;
addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len, addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len,
DMA_TO_DEVICE); DMA_TO_DEVICE);
pci_unmap_addr_set(tx_req, mapping, addr); pci_unmap_addr_set(tx_req, mapping, addr);
if (unlikely(post_send(priv, priv->tx_head & (IPOIB_TX_RING_SIZE - 1), if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
address->ah, qpn, addr, skb->len))) { address->ah, qpn, addr, skb->len))) {
ipoib_warn(priv, "post_send failed\n"); ipoib_warn(priv, "post_send failed\n");
++priv->stats.tx_errors; ++priv->stats.tx_errors;
...@@ -363,7 +363,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ...@@ -363,7 +363,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
address->last_send = priv->tx_head; address->last_send = priv->tx_head;
++priv->tx_head; ++priv->tx_head;
if (priv->tx_head - priv->tx_tail == IPOIB_TX_RING_SIZE) { if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
netif_stop_queue(dev); netif_stop_queue(dev);
} }
...@@ -488,7 +488,7 @@ static int recvs_pending(struct net_device *dev) ...@@ -488,7 +488,7 @@ static int recvs_pending(struct net_device *dev)
int pending = 0; int pending = 0;
int i; int i;
for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) for (i = 0; i < ipoib_recvq_size; ++i)
if (priv->rx_ring[i].skb) if (priv->rx_ring[i].skb)
++pending; ++pending;
...@@ -527,7 +527,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) ...@@ -527,7 +527,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
*/ */
while ((int) priv->tx_tail - (int) priv->tx_head < 0) { while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
tx_req = &priv->tx_ring[priv->tx_tail & tx_req = &priv->tx_ring[priv->tx_tail &
(IPOIB_TX_RING_SIZE - 1)]; (ipoib_sendq_size - 1)];
dma_unmap_single(priv->ca->dma_device, dma_unmap_single(priv->ca->dma_device,
pci_unmap_addr(tx_req, mapping), pci_unmap_addr(tx_req, mapping),
tx_req->skb->len, tx_req->skb->len,
...@@ -536,7 +536,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) ...@@ -536,7 +536,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
++priv->tx_tail; ++priv->tx_tail;
} }
for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) for (i = 0; i < ipoib_recvq_size; ++i)
if (priv->rx_ring[i].skb) { if (priv->rx_ring[i].skb) {
dma_unmap_single(priv->ca->dma_device, dma_unmap_single(priv->ca->dma_device,
pci_unmap_addr(&priv->rx_ring[i], pci_unmap_addr(&priv->rx_ring[i],
......
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/kernel.h>
#include <linux/if_arp.h> /* For ARPHRD_xxx */ #include <linux/if_arp.h> /* For ARPHRD_xxx */
...@@ -53,6 +54,14 @@ MODULE_AUTHOR("Roland Dreier"); ...@@ -53,6 +54,14 @@ MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
MODULE_LICENSE("Dual BSD/GPL"); MODULE_LICENSE("Dual BSD/GPL");
int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE;
int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE;
module_param_named(send_queue_size, ipoib_sendq_size, int, 0444);
MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
int ipoib_debug_level; int ipoib_debug_level;
...@@ -795,20 +804,19 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) ...@@ -795,20 +804,19 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
/* Allocate RX/TX "rings" to hold queued skbs */ /* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
priv->rx_ring = kzalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf),
GFP_KERNEL); GFP_KERNEL);
if (!priv->rx_ring) { if (!priv->rx_ring) {
printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
ca->name, IPOIB_RX_RING_SIZE); ca->name, ipoib_recvq_size);
goto out; goto out;
} }
priv->tx_ring = kzalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf), priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring,
GFP_KERNEL); GFP_KERNEL);
if (!priv->tx_ring) { if (!priv->tx_ring) {
printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
ca->name, IPOIB_TX_RING_SIZE); ca->name, ipoib_sendq_size);
goto out_rx_ring_cleanup; goto out_rx_ring_cleanup;
} }
...@@ -876,7 +884,7 @@ static void ipoib_setup(struct net_device *dev) ...@@ -876,7 +884,7 @@ static void ipoib_setup(struct net_device *dev)
dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN;
dev->addr_len = INFINIBAND_ALEN; dev->addr_len = INFINIBAND_ALEN;
dev->type = ARPHRD_INFINIBAND; dev->type = ARPHRD_INFINIBAND;
dev->tx_queue_len = IPOIB_TX_RING_SIZE * 2; dev->tx_queue_len = ipoib_sendq_size * 2;
dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;
/* MTU will be reset when mcast join happens */ /* MTU will be reset when mcast join happens */
...@@ -1128,6 +1136,14 @@ static int __init ipoib_init_module(void) ...@@ -1128,6 +1136,14 @@ static int __init ipoib_init_module(void)
{ {
int ret; int ret;
ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size);
ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE);
ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE);
ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE);
ret = ipoib_register_debugfs(); ret = ipoib_register_debugfs();
if (ret) if (ret)
return ret; return ret;
......
...@@ -159,8 +159,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -159,8 +159,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr init_attr = { struct ib_qp_init_attr init_attr = {
.cap = { .cap = {
.max_send_wr = IPOIB_TX_RING_SIZE, .max_send_wr = ipoib_sendq_size,
.max_recv_wr = IPOIB_RX_RING_SIZE, .max_recv_wr = ipoib_recvq_size,
.max_send_sge = 1, .max_send_sge = 1,
.max_recv_sge = 1 .max_recv_sge = 1
}, },
...@@ -175,7 +175,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -175,7 +175,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
} }
priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev,
IPOIB_TX_RING_SIZE + IPOIB_RX_RING_SIZE + 1); ipoib_sendq_size + ipoib_recvq_size + 1);
if (IS_ERR(priv->cq)) { if (IS_ERR(priv->cq)) {
printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
goto out_free_pd; goto out_free_pd;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment