Commit c6215745 authored by Moni Shoua's avatar Moni Shoua Committed by David S. Miller

IB/mlx4: Load balance ports in port aggregation mode

When the mlx4 IB (RoCE) device works in link aggregation mode, it
exposes a single port to upper layers. Therefore, applications always
set '1' in port_num attribute when modifying a QP or creating an address handle.

To make sure that a node uses all available ports the mlx4 driver will
override the port_num attribute with a round robin policy.
Signed-off-by: default avatarMoni Shoua <monis@mellanox.com>
Signed-off-by: default avatarOr Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 146d6e19
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/inet.h> #include <linux/inet.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/mlx4/driver.h>
#include "mlx4_ib.h" #include "mlx4_ib.h"
......
...@@ -2153,6 +2153,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ...@@ -2153,6 +2153,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock); MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
ibdev->dev = dev; ibdev->dev = dev;
ibdev->bond_next_port = 0;
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
ibdev->ib_dev.owner = THIS_MODULE; ibdev->ib_dev.owner = THIS_MODULE;
......
...@@ -534,6 +534,7 @@ struct mlx4_ib_dev { ...@@ -534,6 +534,7 @@ struct mlx4_ib_dev {
struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS]; struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS];
/* lock when destroying qp1_proxy and getting netdev events */ /* lock when destroying qp1_proxy and getting netdev events */
struct mutex qp1_proxy_lock[MLX4_MAX_PORTS]; struct mutex qp1_proxy_lock[MLX4_MAX_PORTS];
u8 bond_next_port;
}; };
struct ib_event_work { struct ib_event_work {
...@@ -629,6 +630,13 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah) ...@@ -629,6 +630,13 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
return container_of(ibah, struct mlx4_ib_ah, ibah); return container_of(ibah, struct mlx4_ib_ah, ibah);
} }
static inline u8 mlx4_ib_bond_next_port(struct mlx4_ib_dev *dev)
{
dev->bond_next_port = (dev->bond_next_port + 1) % dev->num_ports;
return dev->bond_next_port + 1;
}
int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev); int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev); void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
......
...@@ -1905,6 +1905,22 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -1905,6 +1905,22 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out; goto out;
} }
if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) {
if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) {
if ((ibqp->qp_type == IB_QPT_RC) ||
(ibqp->qp_type == IB_QPT_UD) ||
(ibqp->qp_type == IB_QPT_UC) ||
(ibqp->qp_type == IB_QPT_RAW_PACKET) ||
(ibqp->qp_type == IB_QPT_XRC_INI)) {
attr->port_num = mlx4_ib_bond_next_port(dev);
}
} else {
/* no sense in changing port_num
* when ports are bonded */
attr_mask &= ~IB_QP_PORT;
}
}
if ((attr_mask & IB_QP_PORT) && if ((attr_mask & IB_QP_PORT) &&
(attr->port_num == 0 || attr->port_num > dev->num_ports)) { (attr->port_num == 0 || attr->port_num > dev->num_ports)) {
pr_debug("qpn 0x%x: invalid port number (%d) specified " pr_debug("qpn 0x%x: invalid port number (%d) specified "
...@@ -1955,6 +1971,9 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -1955,6 +1971,9 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT))
attr->port_num = 1;
out: out:
mutex_unlock(&qp->mutex); mutex_unlock(&qp->mutex);
return err; return err;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment