Commit 08ff3235 authored by Or Gerlitz's avatar Or Gerlitz Committed by Roland Dreier

mlx4: 64-byte CQE/EQE support

ConnectX-3 devices can use either 64- or 32-byte completion queue
entries (CQEs) and event queue entries (EQEs).  Using 64-byte
EQEs/CQEs performs better because each entry is aligned to a complete
cacheline.  This patch queries the HCA's capabilities, and if it
supports 64-byte CQEs and EQES the driver will configure the HW to
work in 64-byte mode.

The 32-byte vs 64-byte mode is global per HCA and not per CQ or EQ.

Since this mode is global, userspace (libmlx4) must be updated to work
with the configured CQE size, and guests using SR-IOV virtual
functions need to know both EQE and CQE size.

In case one of the 64-byte CQE/EQE capabilities is activated, the
patch makes sure that older guest drivers that use the QUERY_DEV_FUNC
command (e.g as done in mlx4_core of Linux 3.3..3.6) will notice that
they need an update to be able to work with the PPF. This is done by
changing the returned pf_context_behaviour not to be zero any more. In
case none of these capabilities is activated that value remains zero
and older guest drivers can run OK.

The SRIOV related flow is as follows

1. the PPF does the detection of the new capabilities using
   QUERY_DEV_CAP command.

2. the PPF activates the new capabilities using INIT_HCA.

3. the VF detects if the PPF activated the capabilities using
   QUERY_HCA, and if this is the case activates them for itself too.

Note that the VF detects that it must be aware to the new PF behaviour
using QUERY_FUNC_CAP.  Steps 1 and 2 apply also for native mode.

User space notification is done through a new field introduced in
struct mlx4_ib_ucontext which holds device capabilities for which user
space must take action. This changes the binary interface so the ABI
towards libmlx4 exposed through uverbs is bumped from 3 to 4 but only
when **needed** i.e. only when the driver does use 64-byte CQEs or
future device capabilities which must be in sync by user space. This
practice allows to work with unmodified libmlx4 on older devices (e.g
A0, B0) which don't support 64-byte CQEs.

In order to keep existing systems functional when they update to a
newer kernel that contains these changes in VF and userspace ABI, a
module parameter enable_64b_cqe_eqe must be set to enable 64-byte
mode; the default is currently false.
Signed-off-by: default avatarEli Cohen <eli@mellanox.com>
Signed-off-by: default avatarOr Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: default avatarRoland Dreier <roland@purestorage.com>
parent f4a75d2e
...@@ -66,7 +66,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type) ...@@ -66,7 +66,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
{ {
return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe)); return mlx4_buf_offset(&buf->buf, n * buf->entry_size);
} }
static void *get_cqe(struct mlx4_ib_cq *cq, int n) static void *get_cqe(struct mlx4_ib_cq *cq, int n)
...@@ -77,8 +77,9 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n) ...@@ -77,8 +77,9 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n)
static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n) static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
{ {
struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe); struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe);
return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
!!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
} }
...@@ -99,12 +100,13 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * ...@@ -99,12 +100,13 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
{ {
int err; int err;
err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe), err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
PAGE_SIZE * 2, &buf->buf); PAGE_SIZE * 2, &buf->buf);
if (err) if (err)
goto out; goto out;
buf->entry_size = dev->dev->caps.cqe_size;
err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift, err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift,
&buf->mtt); &buf->mtt);
if (err) if (err)
...@@ -120,8 +122,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * ...@@ -120,8 +122,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
mlx4_mtt_cleanup(dev->dev, &buf->mtt); mlx4_mtt_cleanup(dev->dev, &buf->mtt);
err_buf: err_buf:
mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe), mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf);
&buf->buf);
out: out:
return err; return err;
...@@ -129,7 +130,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * ...@@ -129,7 +130,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe) static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe)
{ {
mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf); mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
} }
static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context, static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
...@@ -137,8 +138,9 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont ...@@ -137,8 +138,9 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
u64 buf_addr, int cqe) u64 buf_addr, int cqe)
{ {
int err; int err;
int cqe_size = dev->dev->caps.cqe_size;
*umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe), *umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
IB_ACCESS_LOCAL_WRITE, 1); IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(*umem)) if (IS_ERR(*umem))
return PTR_ERR(*umem); return PTR_ERR(*umem);
...@@ -331,16 +333,23 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq) ...@@ -331,16 +333,23 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
{ {
struct mlx4_cqe *cqe, *new_cqe; struct mlx4_cqe *cqe, *new_cqe;
int i; int i;
int cqe_size = cq->buf.entry_size;
int cqe_inc = cqe_size == 64 ? 1 : 0;
i = cq->mcq.cons_index; i = cq->mcq.cons_index;
cqe = get_cqe(cq, i & cq->ibcq.cqe); cqe = get_cqe(cq, i & cq->ibcq.cqe);
cqe += cqe_inc;
while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
new_cqe = get_cqe_from_buf(&cq->resize_buf->buf, new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
(i + 1) & cq->resize_buf->cqe); (i + 1) & cq->resize_buf->cqe);
memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe)); memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size);
new_cqe += cqe_inc;
new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
(((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
cqe = get_cqe(cq, ++i & cq->ibcq.cqe); cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
cqe += cqe_inc;
} }
++cq->mcq.cons_index; ++cq->mcq.cons_index;
} }
...@@ -438,6 +447,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) ...@@ -438,6 +447,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
out: out:
mutex_unlock(&cq->resize_mutex); mutex_unlock(&cq->resize_mutex);
return err; return err;
} }
...@@ -586,6 +596,9 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, ...@@ -586,6 +596,9 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
if (!cqe) if (!cqe)
return -EAGAIN; return -EAGAIN;
if (cq->buf.entry_size == 64)
cqe++;
++cq->mcq.cons_index; ++cq->mcq.cons_index;
/* /*
...@@ -807,6 +820,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) ...@@ -807,6 +820,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
int nfreed = 0; int nfreed = 0;
struct mlx4_cqe *cqe, *dest; struct mlx4_cqe *cqe, *dest;
u8 owner_bit; u8 owner_bit;
int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0;
/* /*
* First we need to find the current producer index, so we * First we need to find the current producer index, so we
...@@ -825,12 +839,16 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) ...@@ -825,12 +839,16 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
*/ */
while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
cqe += cqe_inc;
if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
++nfreed; ++nfreed;
} else if (nfreed) { } else if (nfreed) {
dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
dest += cqe_inc;
owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
memcpy(dest, cqe, sizeof *cqe); memcpy(dest, cqe, sizeof *cqe);
dest->owner_sr_opcode = owner_bit | dest->owner_sr_opcode = owner_bit |
......
...@@ -563,15 +563,24 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, ...@@ -563,15 +563,24 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
{ {
struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_ucontext *context; struct mlx4_ib_ucontext *context;
struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
struct mlx4_ib_alloc_ucontext_resp resp; struct mlx4_ib_alloc_ucontext_resp resp;
int err; int err;
if (!dev->ib_active) if (!dev->ib_active)
return ERR_PTR(-EAGAIN); return ERR_PTR(-EAGAIN);
resp.qp_tab_size = dev->dev->caps.num_qps; if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
resp.bf_reg_size = dev->dev->caps.bf_reg_size; resp_v3.qp_tab_size = dev->dev->caps.num_qps;
resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size;
resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
} else {
resp.dev_caps = dev->dev->caps.userspace_caps;
resp.qp_tab_size = dev->dev->caps.num_qps;
resp.bf_reg_size = dev->dev->caps.bf_reg_size;
resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
resp.cqe_size = dev->dev->caps.cqe_size;
}
context = kmalloc(sizeof *context, GFP_KERNEL); context = kmalloc(sizeof *context, GFP_KERNEL);
if (!context) if (!context)
...@@ -586,7 +595,11 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, ...@@ -586,7 +595,11 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
INIT_LIST_HEAD(&context->db_page_list); INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex); mutex_init(&context->db_page_mutex);
err = ib_copy_to_udata(udata, &resp, sizeof resp); if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
else
err = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (err) { if (err) {
mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
kfree(context); kfree(context);
...@@ -1342,7 +1355,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ...@@ -1342,7 +1355,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dma_device = &dev->pdev->dev; ibdev->ib_dev.dma_device = &dev->pdev->dev;
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; if (dev->caps.userspace_caps)
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
else
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
ibdev->ib_dev.uverbs_cmd_mask = ibdev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
......
...@@ -90,6 +90,7 @@ struct mlx4_ib_xrcd { ...@@ -90,6 +90,7 @@ struct mlx4_ib_xrcd {
struct mlx4_ib_cq_buf { struct mlx4_ib_cq_buf {
struct mlx4_buf buf; struct mlx4_buf buf;
struct mlx4_mtt mtt; struct mlx4_mtt mtt;
int entry_size;
}; };
struct mlx4_ib_cq_resize { struct mlx4_ib_cq_resize {
......
...@@ -40,7 +40,9 @@ ...@@ -40,7 +40,9 @@
* Increment this value if any changes that break userspace ABI * Increment this value if any changes that break userspace ABI
* compatibility are made. * compatibility are made.
*/ */
#define MLX4_IB_UVERBS_ABI_VERSION 3
#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3
#define MLX4_IB_UVERBS_ABI_VERSION 4
/* /*
* Make sure that all structs defined in this file remain laid out so * Make sure that all structs defined in this file remain laid out so
...@@ -50,10 +52,18 @@ ...@@ -50,10 +52,18 @@
* instead. * instead.
*/ */
struct mlx4_ib_alloc_ucontext_resp_v3 {
__u32 qp_tab_size;
__u16 bf_reg_size;
__u16 bf_regs_per_page;
};
struct mlx4_ib_alloc_ucontext_resp { struct mlx4_ib_alloc_ucontext_resp {
__u32 dev_caps;
__u32 qp_tab_size; __u32 qp_tab_size;
__u16 bf_reg_size; __u16 bf_reg_size;
__u16 bf_regs_per_page; __u16 bf_regs_per_page;
__u32 cqe_size;
}; };
struct mlx4_ib_alloc_pd_resp { struct mlx4_ib_alloc_pd_resp {
......
...@@ -1755,7 +1755,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) ...@@ -1755,7 +1755,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
spin_lock_init(&s_state->lock); spin_lock_init(&s_state->lock);
} }
memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe)); memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size);
priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD; priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD;
INIT_WORK(&priv->mfunc.master.comm_work, INIT_WORK(&priv->mfunc.master.comm_work,
mlx4_master_comm_channel); mlx4_master_comm_channel);
......
...@@ -51,7 +51,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, ...@@ -51,7 +51,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
int err; int err;
cq->size = entries; cq->size = entries;
cq->buf_size = cq->size * sizeof(struct mlx4_cqe); cq->buf_size = cq->size * mdev->dev->caps.cqe_size;
cq->ring = ring; cq->ring = ring;
cq->is_tx = mode; cq->is_tx = mode;
......
...@@ -1600,6 +1600,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, ...@@ -1600,6 +1600,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
goto out; goto out;
} }
priv->rx_ring_num = prof->rx_ring_num; priv->rx_ring_num = prof->rx_ring_num;
priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0;
priv->mac_index = -1; priv->mac_index = -1;
priv->msg_enable = MLX4_EN_MSG_LEVEL; priv->msg_enable = MLX4_EN_MSG_LEVEL;
spin_lock_init(&priv->stats_lock); spin_lock_init(&priv->stats_lock);
......
...@@ -566,6 +566,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -566,6 +566,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
struct ethhdr *ethh; struct ethhdr *ethh;
dma_addr_t dma; dma_addr_t dma;
u64 s_mac; u64 s_mac;
int factor = priv->cqe_factor;
if (!priv->port_up) if (!priv->port_up)
return 0; return 0;
...@@ -574,7 +575,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -574,7 +575,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
* descriptor offset can be deduced from the CQE index instead of * descriptor offset can be deduced from the CQE index instead of
* reading 'cqe->index' */ * reading 'cqe->index' */
index = cq->mcq.cons_index & ring->size_mask; index = cq->mcq.cons_index & ring->size_mask;
cqe = &cq->buf[index]; cqe = &cq->buf[(index << factor) + factor];
/* Process all completed CQEs */ /* Process all completed CQEs */
while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
...@@ -709,7 +710,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -709,7 +710,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
++cq->mcq.cons_index; ++cq->mcq.cons_index;
index = (cq->mcq.cons_index) & ring->size_mask; index = (cq->mcq.cons_index) & ring->size_mask;
cqe = &cq->buf[index]; cqe = &cq->buf[(index << factor) + factor];
if (++polled == budget) { if (++polled == budget) {
/* We are here because we reached the NAPI budget - /* We are here because we reached the NAPI budget -
* flush only pending LRO sessions */ * flush only pending LRO sessions */
......
...@@ -315,12 +315,13 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) ...@@ -315,12 +315,13 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
struct mlx4_cqe *buf = cq->buf; struct mlx4_cqe *buf = cq->buf;
u32 packets = 0; u32 packets = 0;
u32 bytes = 0; u32 bytes = 0;
int factor = priv->cqe_factor;
if (!priv->port_up) if (!priv->port_up)
return; return;
index = cons_index & size_mask; index = cons_index & size_mask;
cqe = &buf[index]; cqe = &buf[(index << factor) + factor];
ring_index = ring->cons & size_mask; ring_index = ring->cons & size_mask;
/* Process all completed CQEs */ /* Process all completed CQEs */
...@@ -349,7 +350,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) ...@@ -349,7 +350,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
++cons_index; ++cons_index;
index = cons_index & size_mask; index = cons_index & size_mask;
cqe = &buf[index]; cqe = &buf[(index << factor) + factor];
} }
......
...@@ -101,15 +101,21 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not) ...@@ -101,15 +101,21 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not)
mb(); mb();
} }
static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry) static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor)
{ {
unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE; /* (entry & (eq->nent - 1)) gives us a cyclic array */
return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE; unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor);
/* CX3 is capable of extending the EQE from 32 to 64 bytes.
* When this feature is enabled, the first (in the lower addresses)
* 32 bytes in the 64 byte EQE are reserved and the next 32 bytes
* contain the legacy EQE information.
*/
return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE;
} }
static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq) static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor)
{ {
struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index); struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor);
return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe; return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
} }
...@@ -177,7 +183,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe) ...@@ -177,7 +183,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
return; return;
} }
memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1); memcpy(s_eqe, eqe, dev->caps.eqe_size - 1);
s_eqe->slave_id = slave; s_eqe->slave_id = slave;
/* ensure all information is written before setting the ownersip bit */ /* ensure all information is written before setting the ownersip bit */
wmb(); wmb();
...@@ -441,7 +447,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) ...@@ -441,7 +447,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
int i; int i;
enum slave_port_gen_event gen_event; enum slave_port_gen_event gen_event;
while ((eqe = next_eqe_sw(eq))) { while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) {
/* /*
* Make sure we read EQ entry contents after we've * Make sure we read EQ entry contents after we've
* checked the ownership bit. * checked the ownership bit.
...@@ -864,7 +870,8 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, ...@@ -864,7 +870,8 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
eq->dev = dev; eq->dev = dev;
eq->nent = roundup_pow_of_two(max(nent, 2)); eq->nent = roundup_pow_of_two(max(nent, 2));
npages = PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE; /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE;
eq->page_list = kmalloc(npages * sizeof *eq->page_list, eq->page_list = kmalloc(npages * sizeof *eq->page_list,
GFP_KERNEL); GFP_KERNEL);
...@@ -966,8 +973,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev, ...@@ -966,8 +973,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox; struct mlx4_cmd_mailbox *mailbox;
int err; int err;
int npages = PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE;
int i; int i;
/* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE;
mailbox = mlx4_alloc_cmd_mailbox(dev); mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) if (IS_ERR(mailbox))
......
...@@ -110,6 +110,8 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) ...@@ -110,6 +110,8 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags)
[42] = "Multicast VEP steering support", [42] = "Multicast VEP steering support",
[48] = "Counters support", [48] = "Counters support",
[59] = "Port management change event support", [59] = "Port management change event support",
[61] = "64 byte EQE support",
[62] = "64 byte CQE support",
}; };
int i; int i;
...@@ -235,7 +237,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, ...@@ -235,7 +237,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
field = dev->caps.num_ports; field = dev->caps.num_ports;
MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
size = 0; /* no PF behaviour is set for now */ size = dev->caps.function_caps; /* set PF behaviours */
MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET); MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
field = 0; /* protected FMR support not available as yet */ field = 0; /* protected FMR support not available as yet */
...@@ -1237,6 +1239,24 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) ...@@ -1237,6 +1239,24 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)
*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4); *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4);
/* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) {
*(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29);
dev->caps.eqe_size = 64;
dev->caps.eqe_factor = 1;
} else {
dev->caps.eqe_size = 32;
dev->caps.eqe_factor = 0;
}
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) {
*(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30);
dev->caps.cqe_size = 64;
dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
} else {
dev->caps.cqe_size = 32;
}
/* QPC/EEC/CQC/EQC/RDMARC attributes */ /* QPC/EEC/CQC/EQC/RDMARC attributes */
MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET);
...@@ -1319,6 +1339,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, ...@@ -1319,6 +1339,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
struct mlx4_cmd_mailbox *mailbox; struct mlx4_cmd_mailbox *mailbox;
__be32 *outbox; __be32 *outbox;
int err; int err;
u8 byte_field;
#define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04 #define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04
...@@ -1370,6 +1391,13 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, ...@@ -1370,6 +1391,13 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
} }
/* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS);
if (byte_field & 0x20) /* 64-bytes eqe enabled */
param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
if (byte_field & 0x40) /* 64-bytes cqe enabled */
param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
/* TPT attributes */ /* TPT attributes */
MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET);
......
...@@ -172,6 +172,7 @@ struct mlx4_init_hca_param { ...@@ -172,6 +172,7 @@ struct mlx4_init_hca_param {
u8 log_uar_sz; u8 log_uar_sz;
u8 uar_page_sz; /* log pg sz in 4k chunks */ u8 uar_page_sz; /* log pg sz in 4k chunks */
u8 fs_hash_enable_bits; u8 fs_hash_enable_bits;
u64 dev_cap_enabled;
}; };
struct mlx4_init_ib_param { struct mlx4_init_ib_param {
......
...@@ -95,8 +95,14 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" ...@@ -95,8 +95,14 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
" Not in use with device managed" " Not in use with device managed"
" flow steering"); " flow steering");
static bool enable_64b_cqe_eqe;
module_param(enable_64b_cqe_eqe, bool, 0444);
MODULE_PARM_DESC(enable_64b_cqe_eqe,
"Enable 64 byte CQEs/EQEs when the the FW supports this");
#define HCA_GLOBAL_CAP_MASK 0 #define HCA_GLOBAL_CAP_MASK 0
#define PF_CONTEXT_BEHAVIOUR_MASK 0
#define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE
static char mlx4_version[] __devinitdata = static char mlx4_version[] __devinitdata =
DRV_NAME ": Mellanox ConnectX core driver v" DRV_NAME ": Mellanox ConnectX core driver v"
...@@ -386,6 +392,21 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) ...@@ -386,6 +392,21 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
if (!enable_64b_cqe_eqe) {
if (dev_cap->flags &
(MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
}
}
if ((dev_cap->flags &
(MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
mlx4_is_master(dev))
dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
return 0; return 0;
} }
/*The function checks if there are live vf, return the num of them*/ /*The function checks if there are live vf, return the num of them*/
...@@ -599,6 +620,21 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) ...@@ -599,6 +620,21 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
goto err_mem; goto err_mem;
} }
if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
dev->caps.eqe_size = 64;
dev->caps.eqe_factor = 1;
} else {
dev->caps.eqe_size = 32;
dev->caps.eqe_factor = 0;
}
if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
dev->caps.cqe_size = 64;
dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
} else {
dev->caps.cqe_size = 32;
}
return 0; return 0;
err_mem: err_mem:
......
...@@ -487,6 +487,7 @@ struct mlx4_en_priv { ...@@ -487,6 +487,7 @@ struct mlx4_en_priv {
int mac_index; int mac_index;
unsigned max_mtu; unsigned max_mtu;
int base_qpn; int base_qpn;
int cqe_factor;
struct mlx4_en_rss_map rss_map; struct mlx4_en_rss_map rss_map;
__be32 ctrl_flags; __be32 ctrl_flags;
......
...@@ -142,6 +142,8 @@ enum { ...@@ -142,6 +142,8 @@ enum {
MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48, MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48,
MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55, MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55,
MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59, MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59,
MLX4_DEV_CAP_FLAG_64B_EQE = 1LL << 61,
MLX4_DEV_CAP_FLAG_64B_CQE = 1LL << 62
}; };
enum { enum {
...@@ -151,6 +153,20 @@ enum { ...@@ -151,6 +153,20 @@ enum {
MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3 MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3
}; };
enum {
MLX4_DEV_CAP_64B_EQE_ENABLED = 1LL << 0,
MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1
};
enum {
MLX4_USER_DEV_CAP_64B_CQE = 1L << 0
};
enum {
MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0
};
#define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) #define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90)
enum { enum {
...@@ -419,6 +435,11 @@ struct mlx4_caps { ...@@ -419,6 +435,11 @@ struct mlx4_caps {
u32 max_counters; u32 max_counters;
u8 port_ib_mtu[MLX4_MAX_PORTS + 1]; u8 port_ib_mtu[MLX4_MAX_PORTS + 1];
u16 sqp_demux; u16 sqp_demux;
u32 eqe_size;
u32 cqe_size;
u8 eqe_factor;
u32 userspace_caps; /* userspace must be aware of these */
u32 function_caps; /* VFs must be aware of these */
}; };
struct mlx4_buf_list { struct mlx4_buf_list {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment