Commit 9f123f74 authored by Maxim Mikityanskiy's avatar Maxim Mikityanskiy Committed by Jakub Kicinski

net/mlx5e: Improve MTT/KSM alignment

Make mlx5e_mpwrq_mtts_per_wqe take into account that KSM requires
smaller alignment than MTT.

Ensure that there is always an even amount of MTTs in a UMR WQE, so that
complete octwords are formed, and no garbage is mapped.

Drop extra alignment in MLX5_MTT_OCTW that may cause setting too big
ucseg->xlt_octowords, also leading to mapping garbage.

Generalize some calculations by introducing the MLX5_OCTWORD constant.
Signed-off-by: default avatarMaxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: default avatarTariq Toukan <tariqt@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 168723c1
...@@ -109,12 +109,8 @@ struct page_pool; ...@@ -109,12 +109,8 @@ struct page_pool;
#define MLX5_MPWRQ_MAX_PAGES_PER_WQE \ #define MLX5_MPWRQ_MAX_PAGES_PER_WQE \
rounddown_pow_of_two(MLX5_UMR_MAX_MTT_SPACE / sizeof(struct mlx5_mtt)) rounddown_pow_of_two(MLX5_UMR_MAX_MTT_SPACE / sizeof(struct mlx5_mtt))
#define MLX5_ALIGN_MTTS(mtts) (ALIGN(mtts, 8))
#define MLX5_ALIGNED_MTTS_OCTW(mtts) ((mtts) / 2)
#define MLX5_MTT_OCTW(mtts) (MLX5_ALIGNED_MTTS_OCTW(MLX5_ALIGN_MTTS(mtts)))
#define MLX5_KSM_OCTW(ksms) (ksms)
#define MLX5E_MAX_RQ_NUM_MTTS \ #define MLX5E_MAX_RQ_NUM_MTTS \
(ALIGN_DOWN(U16_MAX, 4) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */ (ALIGN_DOWN(U16_MAX, 4) * 2) /* Fits into u16 and aligned by WQEBB. */
#define MLX5E_MAX_RQ_NUM_KSMS (U16_MAX - 1) /* So that num_ksms fits into u16. */ #define MLX5E_MAX_RQ_NUM_KSMS (U16_MAX - 1) /* So that num_ksms fits into u16. */
#define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024)) #define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024))
......
...@@ -91,6 +91,13 @@ u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, ...@@ -91,6 +91,13 @@ u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
pages_per_wqe = log_wqe_sz > page_shift ? (1 << (log_wqe_sz - page_shift)) : 1; pages_per_wqe = log_wqe_sz > page_shift ? (1 << (log_wqe_sz - page_shift)) : 1;
/* Two MTTs are needed to form an octword. The number of MTTs is encoded
* in octwords in a UMR WQE, so we need at least two to avoid mapping
* garbage addresses.
*/
if (WARN_ON_ONCE(pages_per_wqe < 2 && umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
pages_per_wqe = 2;
/* Sanity check for further calculations to succeed. */ /* Sanity check for further calculations to succeed. */
BUILD_BUG_ON(MLX5_MPWRQ_MAX_PAGES_PER_WQE > 64); BUILD_BUG_ON(MLX5_MPWRQ_MAX_PAGES_PER_WQE > 64);
if (WARN_ON_ONCE(pages_per_wqe > MLX5_MPWRQ_MAX_PAGES_PER_WQE)) if (WARN_ON_ONCE(pages_per_wqe > MLX5_MPWRQ_MAX_PAGES_PER_WQE))
...@@ -131,7 +138,8 @@ u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, ...@@ -131,7 +138,8 @@ u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
* MTU. These oversize packets are dropped by the driver at a later * MTU. These oversize packets are dropped by the driver at a later
* stage. * stage.
*/ */
return MLX5_ALIGN_MTTS(pages_per_wqe + 1); return ALIGN(pages_per_wqe + 1,
MLX5_SEND_WQE_BB / mlx5e_mpwrq_umr_entry_size(umr_mode));
} }
u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev,
......
...@@ -66,9 +66,10 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) ...@@ -66,9 +66,10 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
umr_wqe->ctrl.opmod_idx_opcode = umr_wqe->ctrl.opmod_idx_opcode =
cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR);
/* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */
offset = ix * rq->mpwqe.mtts_per_wqe; offset = ix * rq->mpwqe.mtts_per_wqe;
if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
offset = MLX5_ALIGNED_MTTS_OCTW(offset); offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
......
...@@ -205,14 +205,11 @@ static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv) ...@@ -205,14 +205,11 @@ static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv)
static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode) static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode)
{ {
switch (umr_mode) { u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
return MLX5_MTT_OCTW(entries); WARN_ON_ONCE(entries * umr_entry_size % MLX5_OCTWORD);
case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
return MLX5_KSM_OCTW(entries); return entries * umr_entry_size / MLX5_OCTWORD;
}
WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode);
return 0;
} }
static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
......
...@@ -682,7 +682,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) ...@@ -682,7 +682,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
MLX5_OPCODE_UMR); MLX5_OPCODE_UMR);
offset = MLX5_ALIGNED_MTTS_OCTW(ix * rq->mpwqe.mtts_per_wqe); offset = (ix * rq->mpwqe.mtts_per_wqe) * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment