Commit 85743f1e authored by Huy Nguyen's avatar Huy Nguyen Committed by David S. Miller

net/mlx4_core: Set UAR page size to 4KB regardless of system page size

problem description:

The current code sets UAR page size equal to system page size.
The ConnectX-3 and ConnectX-3 Pro HWs require minimum 128 UAR pages.
The mlx4 kernel drivers are not loaded if there is less than 128 UAR pages.

solution:

Always set UAR page to 4KB. This allows more UAR pages if the OS
has PAGE_SIZE larger than 4KB. For example, PowerPC kernel use 64KB
system page size, with 4MB uar region, there are 4MB/2/64KB = 32
uars (half for uar, half for blueflame). This does not meet minimum 128
UAR pages requirement. With 4KB UAR page, there are 4MB/2/4KB = 512 uars
which meet the minimum requirement.

Note that only codes in mlx4_core that deal with firmware know that uar
page size is 4KB. Codes that deal with usr page in cq and qp context
(mlx4_ib, mlx4_en and part of mlx4_core) still have the same assumption
that uar page size equals to system page size.

Note that with this implementation, on 64KB system page size kernel, there
are 16 uars per system page but only one uars is used. The other 15
uars are ignored because of the above assumption.

Regarding SR-IOV, mlx4_core in hypervisor will set the uar page size
to 4KB and mlx4_core code in virtual OS will obtain the uar page size from
firmware.

Regarding backward compatibility in SR-IOV, if hypervisor has this new code,
the virtual OS must be updated. If hypervisor has old code, and the virtual
OS has this new code, the new code will be backward compatible with the
old code. If the uar size is big enough, this new code in VF continues to
work with 64 KB uar page size (on PowerPc kernel). If the uar size does not
meet 128 uars requirement, this new code not loaded in VF and print the same
error message as the old code in Hypervisor.
Signed-off-by: default avatarHuy Nguyen <huyn@mellanox.com>
Reviewed-by: default avatarYishai Hadas <yishaih@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 22e3817e
...@@ -1681,9 +1681,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, ...@@ -1681,9 +1681,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
} }
if (qp->ibqp.uobject) if (qp->ibqp.uobject)
context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index); context->usr_page = cpu_to_be32(
mlx4_to_hw_uar_index(dev->dev,
to_mucontext(ibqp->uobject->context)->uar.index));
else else
context->usr_page = cpu_to_be32(dev->priv_uar.index); context->usr_page = cpu_to_be32(
mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index));
if (attr_mask & IB_QP_DEST_QPN) if (attr_mask & IB_QP_DEST_QPN)
context->remote_qpn = cpu_to_be32(attr->dest_qp_num); context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
......
...@@ -318,7 +318,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, ...@@ -318,7 +318,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
if (timestamp_en) if (timestamp_en)
cq_context->flags |= cpu_to_be32(1 << 19); cq_context->flags |= cpu_to_be32(1 << 19);
cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index); cq_context->logsize_usrpage =
cpu_to_be32((ilog2(nent) << 24) |
mlx4_to_hw_uar_index(dev, uar->index));
cq_context->comp_eqn = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn; cq_context->comp_eqn = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn;
cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
......
...@@ -58,7 +58,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, ...@@ -58,7 +58,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
} else { } else {
context->sq_size_stride = ilog2(TXBB_SIZE) - 4; context->sq_size_stride = ilog2(TXBB_SIZE) - 4;
} }
context->usr_page = cpu_to_be32(mdev->priv_uar.index); context->usr_page = cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
mdev->priv_uar.index));
context->local_qpn = cpu_to_be32(qpn); context->local_qpn = cpu_to_be32(qpn);
context->pri_path.ackto = 1 & 0x07; context->pri_path.ackto = 1 & 0x07;
context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6; context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6;
......
...@@ -213,7 +213,9 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, ...@@ -213,7 +213,9 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn, mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn,
ring->cqn, user_prio, &ring->context); ring->cqn, user_prio, &ring->context);
if (ring->bf_alloced) if (ring->bf_alloced)
ring->context.usr_page = cpu_to_be32(ring->bf.uar->index); ring->context.usr_page =
cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
ring->bf.uar->index));
err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context, err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context,
&ring->qp, &ring->qp_state); &ring->qp, &ring->qp_state);
......
...@@ -940,9 +940,10 @@ static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq) ...@@ -940,9 +940,10 @@ static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
if (!priv->eq_table.uar_map[index]) { if (!priv->eq_table.uar_map[index]) {
priv->eq_table.uar_map[index] = priv->eq_table.uar_map[index] =
ioremap(pci_resource_start(dev->persist->pdev, 2) + ioremap(
((eq->eqn / 4) << PAGE_SHIFT), pci_resource_start(dev->persist->pdev, 2) +
PAGE_SIZE); ((eq->eqn / 4) << (dev->uar_page_shift)),
(1 << (dev->uar_page_shift)));
if (!priv->eq_table.uar_map[index]) { if (!priv->eq_table.uar_map[index]) {
mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n", mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n",
eq->eqn); eq->eqn);
......
...@@ -168,6 +168,20 @@ struct mlx4_port_config { ...@@ -168,6 +168,20 @@ struct mlx4_port_config {
static atomic_t pf_loading = ATOMIC_INIT(0); static atomic_t pf_loading = ATOMIC_INIT(0);
static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
struct mlx4_dev_cap *dev_cap)
{
/* The reserved_uars is calculated by system page size unit.
* Therefore, adjustment is added when the uar page size is less
* than the system page size
*/
dev->caps.reserved_uars =
max_t(int,
mlx4_get_num_reserved_uar(dev),
dev_cap->reserved_uars /
(1 << (PAGE_SHIFT - dev->uar_page_shift)));
}
int mlx4_check_port_params(struct mlx4_dev *dev, int mlx4_check_port_params(struct mlx4_dev *dev,
enum mlx4_port_type *port_type) enum mlx4_port_type *port_type)
{ {
...@@ -386,8 +400,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) ...@@ -386,8 +400,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev->caps.reserved_mtts = dev_cap->reserved_mtts; dev->caps.reserved_mtts = dev_cap->reserved_mtts;
dev->caps.reserved_mrws = dev_cap->reserved_mrws; dev->caps.reserved_mrws = dev_cap->reserved_mrws;
/* The first 128 UARs are used for EQ doorbells */
dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars);
dev->caps.reserved_pds = dev_cap->reserved_pds; dev->caps.reserved_pds = dev_cap->reserved_pds;
dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
dev_cap->reserved_xrcds : 0; dev_cap->reserved_xrcds : 0;
...@@ -405,6 +417,15 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) ...@@ -405,6 +417,15 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_gso_sz = dev_cap->max_gso_sz;
dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz;
/* Save uar page shift */
if (!mlx4_is_slave(dev)) {
/* Virtual PCI function needs to determine UAR page size from
* firmware. Only master PCI function can set the uar page size
*/
dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT;
mlx4_set_num_reserved_uars(dev, dev_cap);
}
if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) { if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) {
struct mlx4_init_hca_param hca_param; struct mlx4_init_hca_param hca_param;
...@@ -815,16 +836,25 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) ...@@ -815,16 +836,25 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
return -ENODEV; return -ENODEV;
} }
/* slave gets uar page size from QUERY_HCA fw command */ /* Set uar_page_shift for VF */
dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12); dev->uar_page_shift = hca_param.uar_page_sz + 12;
/* TODO: relax this assumption */ /* Make sure the master uar page size is valid */
if (dev->caps.uar_page_size != PAGE_SIZE) { if (dev->uar_page_shift > PAGE_SHIFT) {
mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n", mlx4_err(dev,
dev->caps.uar_page_size, PAGE_SIZE); "Invalid configuration: uar page size is larger than system page size\n");
return -ENODEV; return -ENODEV;
} }
/* Set reserved_uars based on the uar_page_shift */
mlx4_set_num_reserved_uars(dev, &dev_cap);
/* Although uar page size in FW differs from system page size,
* upper software layers (mlx4_ib, mlx4_en and part of mlx4_core)
* still works with assumption that uar page size == system page size
*/
dev->caps.uar_page_size = PAGE_SIZE;
memset(&func_cap, 0, sizeof(func_cap)); memset(&func_cap, 0, sizeof(func_cap));
err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
if (err) { if (err) {
...@@ -2179,8 +2209,12 @@ static int mlx4_init_hca(struct mlx4_dev *dev) ...@@ -2179,8 +2209,12 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
init_hca.log_uar_sz = ilog2(dev->caps.num_uars); /* Always set UAR page size 4KB, set log_uar_sz accordingly */
init_hca.uar_page_sz = PAGE_SHIFT - 12; init_hca.log_uar_sz = ilog2(dev->caps.num_uars) +
PAGE_SHIFT -
DEFAULT_UAR_PAGE_SHIFT;
init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
init_hca.mw_enabled = 0; init_hca.mw_enabled = 0;
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
......
...@@ -269,9 +269,15 @@ EXPORT_SYMBOL_GPL(mlx4_bf_free); ...@@ -269,9 +269,15 @@ EXPORT_SYMBOL_GPL(mlx4_bf_free);
int mlx4_init_uar_table(struct mlx4_dev *dev) int mlx4_init_uar_table(struct mlx4_dev *dev)
{ {
if (dev->caps.num_uars <= 128) { int num_reserved_uar = mlx4_get_num_reserved_uar(dev);
mlx4_err(dev, "Only %d UAR pages (need more than 128)\n",
dev->caps.num_uars); mlx4_dbg(dev, "uar_page_shift = %d", dev->uar_page_shift);
mlx4_dbg(dev, "Effective reserved_uars=%d", dev->caps.reserved_uars);
if (dev->caps.num_uars <= num_reserved_uar) {
mlx4_err(
dev, "Only %d UAR pages (need more than %d)\n",
dev->caps.num_uars, num_reserved_uar);
mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n"); mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n");
return -ENODEV; return -ENODEV;
} }
......
...@@ -44,6 +44,8 @@ ...@@ -44,6 +44,8 @@
#include <linux/timecounter.h> #include <linux/timecounter.h>
#define DEFAULT_UAR_PAGE_SHIFT 12
#define MAX_MSIX_P_PORT 17 #define MAX_MSIX_P_PORT 17
#define MAX_MSIX 64 #define MAX_MSIX 64
#define MIN_MSIX_P_PORT 5 #define MIN_MSIX_P_PORT 5
...@@ -856,6 +858,7 @@ struct mlx4_dev { ...@@ -856,6 +858,7 @@ struct mlx4_dev {
u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; u64 regid_promisc_array[MLX4_MAX_PORTS + 1];
u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; u64 regid_allmulti_array[MLX4_MAX_PORTS + 1];
struct mlx4_vf_dev *dev_vfs; struct mlx4_vf_dev *dev_vfs;
u8 uar_page_shift;
}; };
struct mlx4_clock_params { struct mlx4_clock_params {
...@@ -1528,4 +1531,14 @@ int mlx4_ACCESS_PTYS_REG(struct mlx4_dev *dev, ...@@ -1528,4 +1531,14 @@ int mlx4_ACCESS_PTYS_REG(struct mlx4_dev *dev,
int mlx4_get_internal_clock_params(struct mlx4_dev *dev, int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
struct mlx4_clock_params *params); struct mlx4_clock_params *params);
static inline int mlx4_to_hw_uar_index(struct mlx4_dev *dev, int index)
{
return (index << (PAGE_SHIFT - dev->uar_page_shift));
}
static inline int mlx4_get_num_reserved_uar(struct mlx4_dev *dev)
{
/* The first 128 UARs are used for EQ doorbells */
return (128 >> (PAGE_SHIFT - dev->uar_page_shift));
}
#endif /* MLX4_DEVICE_H */ #endif /* MLX4_DEVICE_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment