Commit 4a2da0b8 authored by Parav Pandit's avatar Parav Pandit Committed by Doug Ledford

IB/mlx5: Add debug control parameters for congestion control

This patch adds debug control parameters for congestion control which
can be read or written through debugfs. They are for reaction point and
notification point nodes.

These control parameters are as below:
 +------------------------------+-----------------------------------------+
 |      Name                    |           Description                   |
 |------------------------------+-----------------------------------------|
 |rp_clamp_tgt_rate             | When set target rate is updated to      |
 |                              | current rate                            |
 |------------------------------+-----------------------------------------|
 |rp_clamp_tgt_rate_ati         | When set update target rate based on    |
 |                              | timer as well                           |
 |------------------------------+-----------------------------------------|
 |rp_time_reset                 | time between rate increase if no        |
 |                              | CNP is received unit in usec            |
 |------------------------------+-----------------------------------------|
 |rp_byte_reset                 | Number of bytes between rate inease if  |
 |                              | no CNP is received                      |
 |------------------------------+-----------------------------------------|
 |rp_threshold                  | Threshold for reaction point rate       |
 |                              | control                                 |
 |------------------------------+-----------------------------------------|
 |rp_ai_rate                    | Rate for target rate, unit in Mbps      |
 |------------------------------+-----------------------------------------|
 |rp_hai_rate                   | Rate for hyper increase state           |
 |                              | unit in Mbps                            |
 |------------------------------+-----------------------------------------|
 |rp_min_dec_fac                | Minimum factor by which the current     |
 |                              | transmit rate can be changed when       |
 |                              | processing a CNP, unit is percerntage   |
 |------------------------------+-----------------------------------------|
 |rp_min_rate                   | Minimum value for rate limit,           |
 |                              | unit in Mbps                            |
 |------------------------------+-----------------------------------------|
 |rp_rate_to_set_on_first_cnp   | Rate that is set when first CNP is      |
 |                              | received, unit is Mbps                  |
 |------------------------------+-----------------------------------------|
 |rp_dce_tcp_g                  | Used to calculate alpha                 |
 |------------------------------+-----------------------------------------|
 |rp_dce_tcp_rtt                | Time between updates of alpha value,    |
 |                              | unit is usec                            |
 |------------------------------+-----------------------------------------|
 |rp_rate_reduce_monitor_period | Minimum time between consecutive rate   |
 |                              | reductions                              |
 |------------------------------+-----------------------------------------|
 |rp_initial_alpha_value        | Initial value of alpha                  |
 |------------------------------+-----------------------------------------|
 |rp_gd                         | When CNP is received, flow rate is      |
 |                              | reduced based on gd, rp_gd is given as  |
 |                              | log2(rp_gd)                             |
 |------------------------------+-----------------------------------------|
 |np_cnp_dscp                   | dscp code point for generated cnp       |
 |------------------------------+-----------------------------------------|
 |np_cnp_prio_mode              | 802.1p priority for generated cnp       |
 |------------------------------+-----------------------------------------|
 |np_cnp_prio                   | cnp priority mode                       |
 +------------------------------+-----------------------------------------+
Signed-off-by: default avatarParav Pandit <parav@mellanox.com>
Reviewed-by: default avatarDaniel Jurgens <danielj@mellanox.com>
Reviewed-by: default avatarEli Cohen <eli@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent fd65f1b8
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
......@@ -57,3 +57,23 @@ int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
MLX5_SET(query_cong_statistics_in, in, clear, reset);
return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
}
int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
void *out, int out_size)
{
u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = { };
MLX5_SET(query_cong_params_in, in, opcode,
MLX5_CMD_OP_QUERY_CONG_PARAMS);
MLX5_SET(query_cong_params_in, in, cong_protocol, cong_point);
return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
}
int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev,
void *in, int in_size)
{
u32 out[MLX5_ST_SZ_DW(modify_cong_params_out)] = { };
return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out));
}
......@@ -39,4 +39,8 @@
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
bool reset, void *out, int out_size);
int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
void *out, int out_size);
int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
void *in, int in_size);
#endif /* MLX5_IB_CMD_H */
This diff is collapsed.
......@@ -3838,9 +3838,13 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
goto err_odp;
}
err = mlx5_ib_init_cong_debugfs(dev);
if (err)
goto err_cnt;
dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
if (!dev->mdev->priv.uar)
goto err_cnt;
goto err_cong;
err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
if (err)
......@@ -3889,6 +3893,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
err_cnt:
mlx5_ib_cleanup_cong_debugfs(dev);
err_cong:
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
......@@ -3923,6 +3929,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
mlx5_ib_cleanup_cong_debugfs(dev);
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
destroy_umrc_res(dev);
......
......@@ -619,6 +619,39 @@ struct mlx5_roce {
enum ib_port_state last_port_state;
};
struct mlx5_ib_dbg_param {
int offset;
struct mlx5_ib_dev *dev;
struct dentry *dentry;
};
enum mlx5_ib_dbg_cc_types {
MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE,
MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI,
MLX5_IB_DBG_CC_RP_TIME_RESET,
MLX5_IB_DBG_CC_RP_BYTE_RESET,
MLX5_IB_DBG_CC_RP_THRESHOLD,
MLX5_IB_DBG_CC_RP_AI_RATE,
MLX5_IB_DBG_CC_RP_HAI_RATE,
MLX5_IB_DBG_CC_RP_MIN_DEC_FAC,
MLX5_IB_DBG_CC_RP_MIN_RATE,
MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP,
MLX5_IB_DBG_CC_RP_DCE_TCP_G,
MLX5_IB_DBG_CC_RP_DCE_TCP_RTT,
MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD,
MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE,
MLX5_IB_DBG_CC_RP_GD,
MLX5_IB_DBG_CC_NP_CNP_DSCP,
MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE,
MLX5_IB_DBG_CC_NP_CNP_PRIO,
MLX5_IB_DBG_CC_MAX,
};
struct mlx5_ib_dbg_cc_params {
struct dentry *root;
struct mlx5_ib_dbg_param params[MLX5_IB_DBG_CC_MAX];
};
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
......@@ -655,6 +688,7 @@ struct mlx5_ib_dev {
struct mlx5_ib_port *port;
struct mlx5_sq_bfreg bfreg;
struct mlx5_sq_bfreg fp_bfreg;
struct mlx5_ib_dbg_cc_params *dbg_cc_params;
/* protect the user_td */
struct mutex lb_mutex;
......@@ -909,6 +943,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
int index, enum ib_gid_type *gid_type);
void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev);
int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev);
/* GSI QP helper functions */
struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr);
......
......@@ -1188,7 +1188,8 @@ struct mlx5_ifc_cong_control_r_roce_ecn_np_bits {
u8 reserved_at_c0[0x12];
u8 cnp_dscp[0x6];
u8 reserved_at_d8[0x5];
u8 reserved_at_d8[0x4];
u8 cnp_prio_mode[0x1];
u8 cnp_802p_prio[0x3];
u8 reserved_at_e0[0x720];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment