Commit ab6013a5 authored by Dragos Tatulea's avatar Dragos Tatulea Committed by Jakub Kicinski

net/mlx5e: SHAMPO, Increase timeout to improve latency

During latency tests (netperf TCP_RR) a 30% degradation of HW GRO vs SW
GRO was observed. This is due to SHAMPO triggering timeout filler CQEs
instead of delivering the CQE for the packet.

Having a short timeout for SHAMPO doesn't bring any benefits as it is
the driver that does the merging, not the hardware. On the contrary, it
can have a negative impact: additional filler CQEs are generated due to
the timeout. As there is no way to disable this timeout, this change
sets it to the maximum value.

Instead of using the packet_merge.timeout parameter which is also used
for LRO, set the value directly when filling in the rest of the SHAMPO
parameters in mlx5e_build_rq_param().

Fixes: 99be5617 ("net/mlx5e: SHAMPO, Re-enable HW-GRO")
Signed-off-by: default avatarDragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: default avatarTariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20240808144107.2095424-3-tariqt@nvidia.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent c31fe2b5
...@@ -130,7 +130,7 @@ struct page_pool; ...@@ -130,7 +130,7 @@ struct page_pool;
#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2
#define MLX5E_DEFAULT_LRO_TIMEOUT 32 #define MLX5E_DEFAULT_LRO_TIMEOUT 32
#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 #define MLX5E_DEFAULT_SHAMPO_TIMEOUT 1024
#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10
#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3
......
...@@ -928,7 +928,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, ...@@ -928,7 +928,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
MLX5_SET(wq, wq, log_headers_entry_size, MLX5_SET(wq, wq, log_headers_entry_size,
mlx5e_shampo_get_log_hd_entry_size(mdev, params)); mlx5e_shampo_get_log_hd_entry_size(mdev, params));
MLX5_SET(rqc, rqc, reservation_timeout, MLX5_SET(rqc, rqc, reservation_timeout,
params->packet_merge.timeout); mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_SHAMPO_TIMEOUT));
MLX5_SET(rqc, rqc, shampo_match_criteria_type, MLX5_SET(rqc, rqc, shampo_match_criteria_type,
params->packet_merge.shampo.match_criteria_type); params->packet_merge.shampo.match_criteria_type);
MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity,
...@@ -1087,6 +1087,20 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev, ...@@ -1087,6 +1087,20 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
return wqebbs; return wqebbs;
} }
#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4
u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
{
int i;
/* The supported periods are organized in ascending order */
for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++)
if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout)
break;
return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
}
static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev, static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev,
struct mlx5e_params *params, struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk) struct mlx5e_xsk_param *xsk)
......
...@@ -108,6 +108,7 @@ u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, ...@@ -108,6 +108,7 @@ u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev,
u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev, u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev,
struct mlx5e_params *params, struct mlx5e_params *params,
struct mlx5e_rq_param *rq_param); struct mlx5e_rq_param *rq_param);
u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout);
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params, struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk); struct mlx5e_xsk_param *xsk);
......
...@@ -5167,18 +5167,6 @@ const struct net_device_ops mlx5e_netdev_ops = { ...@@ -5167,18 +5167,6 @@ const struct net_device_ops mlx5e_netdev_ops = {
#endif #endif
}; };
static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
{
int i;
/* The supported periods are organized in ascending order */
for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++)
if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout)
break;
return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
}
void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu) void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu)
{ {
struct mlx5e_params *params = &priv->channels.params; struct mlx5e_params *params = &priv->channels.params;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment