Commit 6ee51a4e authored by Jack Morgenstein's avatar Jack Morgenstein Committed by David S. Miller

mlx4: Adjust QP1 multiplexing for RoCE/SRIOV

This requires the following modifications:
1. Fix build_mlx4_header to properly fill in the ETH fields
2. Adjust mux and demux QP1 flow to support RoCE.

This commit still assumes only one GID per slave for RoCE.
The commit enabling multiple GIDs is a subsequent commit, and
is done separately because of its complexity.
Signed-off-by: default avatarJack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: default avatarOr Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 36f6fdb7
...@@ -323,6 +323,9 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, ...@@ -323,6 +323,9 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) { if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) {
union ib_gid gid; union ib_gid gid;
if (!slave)
return 0;
gid = gid_from_req_msg(ibdev, mad); gid = gid_from_req_msg(ibdev, mad);
*slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id); *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
if (*slave < 0) { if (*slave < 0) {
...@@ -341,6 +344,7 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, ...@@ -341,6 +344,7 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
return -ENOENT; return -ENOENT;
} }
if (slave)
*slave = id->slave_id; *slave = id->slave_id;
set_remote_comm_id(mad, id->sl_cm_id); set_remote_comm_id(mad, id->sl_cm_id);
......
...@@ -467,6 +467,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, ...@@ -467,6 +467,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
int ret = 0; int ret = 0;
u16 tun_pkey_ix; u16 tun_pkey_ix;
u16 cached_pkey; u16 cached_pkey;
u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
if (dest_qpt > IB_QPT_GSI) if (dest_qpt > IB_QPT_GSI)
return -EINVAL; return -EINVAL;
...@@ -509,6 +510,12 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, ...@@ -509,6 +510,12 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
* The driver will set the force loopback bit in post_send */ * The driver will set the force loopback bit in post_send */
memset(&attr, 0, sizeof attr); memset(&attr, 0, sizeof attr);
attr.port_num = port; attr.port_num = port;
if (is_eth) {
ret = mlx4_get_roce_gid_from_slave(dev->dev, port, slave, attr.grh.dgid.raw);
if (ret)
return ret;
attr.ah_flags = IB_AH_GRH;
}
ah = ib_create_ah(tun_ctx->pd, &attr); ah = ib_create_ah(tun_ctx->pd, &attr);
if (IS_ERR(ah)) if (IS_ERR(ah))
return -ENOMEM; return -ENOMEM;
...@@ -580,6 +587,41 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, ...@@ -580,6 +587,41 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
int err; int err;
int slave; int slave;
u8 *slave_id; u8 *slave_id;
int is_eth = 0;
if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
is_eth = 0;
else
is_eth = 1;
if (is_eth) {
if (!(wc->wc_flags & IB_WC_GRH)) {
mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
return -EINVAL;
}
if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) {
mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
return -EINVAL;
}
if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) {
mlx4_ib_warn(ibdev, "failed matching grh\n");
return -ENOENT;
}
if (slave >= dev->dev->caps.sqp_demux) {
mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
slave, dev->dev->caps.sqp_demux);
return -ENOENT;
}
if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad))
return 0;
err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
if (err)
pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
slave, err);
return 0;
}
/* Initially assume that this mad is for us */ /* Initially assume that this mad is for us */
slave = mlx4_master_func_num(dev->dev); slave = mlx4_master_func_num(dev->dev);
...@@ -1260,12 +1302,8 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc ...@@ -1260,12 +1302,8 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av)); memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
ah.ibah.device = ctx->ib_dev; ah.ibah.device = ctx->ib_dev;
mlx4_ib_query_ah(&ah.ibah, &ah_attr); mlx4_ib_query_ah(&ah.ibah, &ah_attr);
if ((ah_attr.ah_flags & IB_AH_GRH) && if (ah_attr.ah_flags & IB_AH_GRH)
(ah_attr.grh.sgid_index != slave)) { ah_attr.grh.sgid_index = slave;
mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n",
slave, ah_attr.grh.sgid_index);
return;
}
mlx4_ib_send_to_wire(dev, slave, ctx->port, mlx4_ib_send_to_wire(dev, slave, ctx->port,
is_proxy_qp0(dev, wc->src_qp, slave) ? is_proxy_qp0(dev, wc->src_qp, slave) ?
......
...@@ -1842,9 +1842,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, ...@@ -1842,9 +1842,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
{ {
struct ib_device *ib_dev = sqp->qp.ibqp.device; struct ib_device *ib_dev = sqp->qp.ibqp.device;
struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_ctrl_seg *ctrl = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
struct net_device *ndev;
union ib_gid sgid; union ib_gid sgid;
u16 pkey; u16 pkey;
int send_size; int send_size;
...@@ -1868,12 +1868,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, ...@@ -1868,12 +1868,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
/* When multi-function is enabled, the ib_core gid /* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so * indexes don't necessarily match the hw ones, so
* we must use our own cache */ * we must use our own cache */
sgid.global.subnet_prefix = err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev,
to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. be32_to_cpu(ah->av.ib.port_pd) >> 24,
subnet_prefix; ah->av.ib.gid_index, &sgid.raw[0]);
sgid.global.interface_id = if (err)
to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. return err;
guid_cache[ah->av.ib.gid_index];
} else { } else {
err = ib_get_cached_gid(ib_dev, err = ib_get_cached_gid(ib_dev,
be32_to_cpu(ah->av.ib.port_pd) >> 24, be32_to_cpu(ah->av.ib.port_pd) >> 24,
...@@ -1902,6 +1901,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, ...@@ -1902,6 +1901,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
sqp->ud_header.grh.flow_label = sqp->ud_header.grh.flow_label =
ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
if (is_eth)
memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
else {
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid /* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so * indexes don't necessarily match the hw ones, so
...@@ -1917,6 +1919,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, ...@@ -1917,6 +1919,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
be32_to_cpu(ah->av.ib.port_pd) >> 24, be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index, ah->av.ib.gid_index,
&sqp->ud_header.grh.source_gid); &sqp->ud_header.grh.source_gid);
}
memcpy(sqp->ud_header.grh.destination_gid.raw, memcpy(sqp->ud_header.grh.destination_gid.raw,
ah->av.ib.dgid, 16); ah->av.ib.dgid, 16);
} }
...@@ -1948,17 +1951,19 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, ...@@ -1948,17 +1951,19 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
} }
if (is_eth) { if (is_eth) {
u8 *smac; u8 smac[6];
struct in6_addr in6;
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
mlx->sched_prio = cpu_to_be16(pcp); mlx->sched_prio = cpu_to_be16(pcp);
memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
/* FIXME: cache smac value? */ /* FIXME: cache smac value? */
ndev = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]; memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
if (!ndev) memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
return -ENODEV; memcpy(&in6, sgid.raw, sizeof(in6));
smac = ndev->dev_addr; rdma_get_ll_mac(&in6, smac);
memcpy(sqp->ud_header.eth.smac_h, smac, 6); memcpy(sqp->ud_header.eth.smac_h, smac, 6);
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
......
...@@ -788,6 +788,10 @@ enum { ...@@ -788,6 +788,10 @@ enum {
MLX4_USE_RR = 1, MLX4_USE_RR = 1,
}; };
struct mlx4_roce_gid_entry {
u8 raw[16];
};
struct mlx4_priv { struct mlx4_priv {
struct mlx4_dev dev; struct mlx4_dev dev;
...@@ -834,6 +838,7 @@ struct mlx4_priv { ...@@ -834,6 +838,7 @@ struct mlx4_priv {
int fs_hash_mode; int fs_hash_mode;
u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
__be64 slave_node_guids[MLX4_MFUNC_MAX]; __be64 slave_node_guids[MLX4_MFUNC_MAX];
struct mlx4_roce_gid_entry roce_gids[MLX4_MAX_PORTS][MLX4_ROCE_MAX_GIDS];
atomic_t opreq_count; atomic_t opreq_count;
struct work_struct opreq_task; struct work_struct opreq_task;
......
...@@ -927,3 +927,37 @@ void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap) ...@@ -927,3 +927,37 @@ void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap)
*stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK; *stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK;
} }
EXPORT_SYMBOL(mlx4_set_stats_bitmap); EXPORT_SYMBOL(mlx4_set_stats_bitmap);
int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int i, found_ix = -1;
if (!mlx4_is_mfunc(dev))
return -EINVAL;
for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
if (!memcmp(priv->roce_gids[port - 1][i].raw, gid, 16)) {
found_ix = i;
break;
}
}
if (found_ix >= 0)
*slave_id = found_ix;
return (found_ix >= 0) ? 0 : -EINVAL;
}
EXPORT_SYMBOL(mlx4_get_slave_from_roce_gid);
int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, u8 *gid)
{
struct mlx4_priv *priv = mlx4_priv(dev);
if (!mlx4_is_master(dev))
return -EINVAL;
memcpy(gid, priv->roce_gids[port - 1][slave_id].raw, 16);
return 0;
}
EXPORT_SYMBOL(mlx4_get_roce_gid_from_slave);
...@@ -48,6 +48,8 @@ ...@@ -48,6 +48,8 @@
#define MSIX_LEGACY_SZ 4 #define MSIX_LEGACY_SZ 4
#define MIN_MSIX_P_PORT 5 #define MIN_MSIX_P_PORT 5
#define MLX4_ROCE_MAX_GIDS 128
enum { enum {
MLX4_FLAG_MSI_X = 1 << 0, MLX4_FLAG_MSI_X = 1 << 0,
MLX4_FLAG_OLD_PORT_CMDS = 1 << 1, MLX4_FLAG_OLD_PORT_CMDS = 1 << 1,
...@@ -1182,6 +1184,8 @@ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int ...@@ -1182,6 +1184,8 @@ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int
void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid); void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid);
__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave); __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave);
int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id);
int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, u8 *gid);
int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn,
u32 max_range_qpn); u32 max_range_qpn);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment