Commit 0109841f authored by Jakub Kicinski's avatar Jakub Kicinski

Merge tag 'mlx5-updates-2021-12-02' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2021-12-02

Misc updates to mlx5 driver

1) Various code cleanups
2) Error path handling fixes of latest features
3) Print more information on pci error handling
4) Dynamically resize flow counters query buffer
====================

Link: https://lore.kernel.org/r/20211203005622.183325-1-saeed@kernel.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents fc993be3 b247f32a
......@@ -115,6 +115,7 @@ config MLX5_TC_CT
config MLX5_TC_SAMPLE
bool "MLX5 TC sample offload support"
depends on MLX5_CLS_ACT
depends on PSAMPLE=y || PSAMPLE=n || MLX5_CORE=m
default y
help
Say Y here if you want to support offloading sample rules via tc
......
......@@ -173,7 +173,7 @@ struct page_pool;
#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\
ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT)
#define MLX5E_MAX_KLM_PER_WQE(mdev) \
#define MLX5E_MAX_KLM_PER_WQE \
MLX5E_KLM_ENTRIES_PER_WQE(MLX5E_TX_MPW_MAX_NUM_DS << MLX5_MKEY_BSF_OCTO_SIZE)
#define MLX5E_MSG_LEVEL NETIF_MSG_LINK
......@@ -1057,7 +1057,6 @@ int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
mlx5e_fp_preactivate preactivate,
void *context, bool reset);
int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv);
int mlx5e_num_channels_changed(struct mlx5e_priv *priv);
int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context);
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
......
......@@ -717,7 +717,7 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
u32 wqebbs;
max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev);
max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE;
max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr;
rest = max_hd_per_wqe % max_klm_per_umr;
......
......@@ -2598,7 +2598,7 @@ static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv,
}
}
int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
static int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
{
u16 count = priv->channels.params.num_channels;
int err;
......
......@@ -619,7 +619,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
struct mlx5e_icosq *sq = rq->icosq;
int i, err, max_klm_entries, len;
max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev);
max_klm_entries = MLX5E_MAX_KLM_PER_WQE;
klm_entries = bitmap_find_window(shampo->bitmap,
shampo->hd_per_wqe,
shampo->hd_per_wq, shampo->pi);
......
......@@ -590,6 +590,7 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta
if (IS_ERR(esw->qos.group0)) {
esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
PTR_ERR(esw->qos.group0));
err = PTR_ERR(esw->qos.group0);
goto err_group0;
}
}
......
......@@ -38,9 +38,10 @@
#include "fs_cmd.h"
#define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
#define MLX5_FC_BULK_QUERY_ALLOC_PERIOD msecs_to_jiffies(180 * 1000)
/* Max number of counters to query in bulk read is 32K */
#define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
#define MLX5_SF_NUM_COUNTERS_BULK 8
#define MLX5_INIT_COUNTERS_BULK 8
#define MLX5_FC_POOL_MAX_THRESHOLD BIT(18)
#define MLX5_FC_POOL_USED_BUFF_RATIO 10
......@@ -145,13 +146,15 @@ static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev,
spin_unlock(&fc_stats->counters_idr_lock);
}
static int get_max_bulk_query_len(struct mlx5_core_dev *dev)
static int get_init_bulk_query_len(struct mlx5_core_dev *dev)
{
int num_counters_bulk = mlx5_core_is_sf(dev) ?
MLX5_SF_NUM_COUNTERS_BULK :
MLX5_SW_MAX_COUNTERS_BULK;
return min_t(int, MLX5_INIT_COUNTERS_BULK,
(1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
}
return min_t(int, num_counters_bulk,
static int get_max_bulk_query_len(struct mlx5_core_dev *dev)
{
return min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
(1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
}
......@@ -177,7 +180,7 @@ static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev,
{
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
bool query_more_counters = (first->id <= last_id);
int max_bulk_len = get_max_bulk_query_len(dev);
int cur_bulk_len = fc_stats->bulk_query_len;
u32 *data = fc_stats->bulk_query_out;
struct mlx5_fc *counter = first;
u32 bulk_base_id;
......@@ -189,7 +192,7 @@ static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev,
bulk_base_id = counter->id & ~0x3;
/* number of counters to query inc. the last counter */
bulk_len = min_t(int, max_bulk_len,
bulk_len = min_t(int, cur_bulk_len,
ALIGN(last_id - bulk_base_id + 1, 4));
err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len,
......@@ -230,6 +233,41 @@ static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
mlx5_fc_free(dev, counter);
}
static void mlx5_fc_stats_bulk_query_size_increase(struct mlx5_core_dev *dev)
{
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
int max_bulk_len = get_max_bulk_query_len(dev);
unsigned long now = jiffies;
u32 *bulk_query_out_tmp;
int max_out_len;
if (fc_stats->bulk_query_alloc_failed &&
time_before(now, fc_stats->next_bulk_query_alloc))
return;
max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len);
bulk_query_out_tmp = kzalloc(max_out_len, GFP_KERNEL);
if (!bulk_query_out_tmp) {
mlx5_core_warn_once(dev,
"Can't increase flow counters bulk query buffer size, insufficient memory, bulk_size(%d)\n",
max_bulk_len);
fc_stats->bulk_query_alloc_failed = true;
fc_stats->next_bulk_query_alloc =
now + MLX5_FC_BULK_QUERY_ALLOC_PERIOD;
return;
}
kfree(fc_stats->bulk_query_out);
fc_stats->bulk_query_out = bulk_query_out_tmp;
fc_stats->bulk_query_len = max_bulk_len;
if (fc_stats->bulk_query_alloc_failed) {
mlx5_core_info(dev,
"Flow counters bulk query buffer size increased, bulk_size(%d)\n",
max_bulk_len);
fc_stats->bulk_query_alloc_failed = false;
}
}
static void mlx5_fc_stats_work(struct work_struct *work)
{
struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
......@@ -247,15 +285,22 @@ static void mlx5_fc_stats_work(struct work_struct *work)
queue_delayed_work(fc_stats->wq, &fc_stats->work,
fc_stats->sampling_interval);
llist_for_each_entry(counter, addlist, addlist)
llist_for_each_entry(counter, addlist, addlist) {
mlx5_fc_stats_insert(dev, counter);
fc_stats->num_counters++;
}
llist_for_each_entry_safe(counter, tmp, dellist, dellist) {
mlx5_fc_stats_remove(dev, counter);
mlx5_fc_release(dev, counter);
fc_stats->num_counters--;
}
if (fc_stats->bulk_query_len < get_max_bulk_query_len(dev) &&
fc_stats->num_counters > get_init_bulk_query_len(dev))
mlx5_fc_stats_bulk_query_size_increase(dev);
if (time_before(now, fc_stats->next_query) ||
list_empty(&fc_stats->counters))
return;
......@@ -378,8 +423,8 @@ EXPORT_SYMBOL(mlx5_fc_destroy);
int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
{
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
int max_bulk_len;
int max_out_len;
int init_bulk_len;
int init_out_len;
spin_lock_init(&fc_stats->counters_idr_lock);
idr_init(&fc_stats->counters_idr);
......@@ -387,11 +432,12 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
init_llist_head(&fc_stats->addlist);
init_llist_head(&fc_stats->dellist);
max_bulk_len = get_max_bulk_query_len(dev);
max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len);
fc_stats->bulk_query_out = kzalloc(max_out_len, GFP_KERNEL);
init_bulk_len = get_init_bulk_query_len(dev);
init_out_len = mlx5_cmd_fc_get_bulk_query_out_len(init_bulk_len);
fc_stats->bulk_query_out = kzalloc(init_out_len, GFP_KERNEL);
if (!fc_stats->bulk_query_out)
return -ENOMEM;
fc_stats->bulk_query_len = init_bulk_len;
fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
if (!fc_stats->wq)
......
......@@ -110,7 +110,7 @@ void mlx5i_cleanup(struct mlx5e_priv *priv)
static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv)
{
struct mlx5e_sw_stats s = { 0 };
struct rtnl_link_stats64 s = {};
int i, j;
for (i = 0; i < priv->stats_nch; i++) {
......@@ -128,11 +128,17 @@ static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv)
s.tx_packets += sq_stats->packets;
s.tx_bytes += sq_stats->bytes;
s.tx_queue_dropped += sq_stats->dropped;
s.tx_dropped += sq_stats->dropped;
}
}
memcpy(&priv->stats.sw, &s, sizeof(s));
memset(&priv->stats.sw, 0, sizeof(s));
priv->stats.sw.rx_packets = s.rx_packets;
priv->stats.sw.rx_bytes = s.rx_bytes;
priv->stats.sw.tx_packets = s.tx_packets;
priv->stats.sw.tx_bytes = s.tx_bytes;
priv->stats.sw.tx_queue_dropped = s.tx_dropped;
}
void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
......
......@@ -1604,12 +1604,28 @@ static void remove_one(struct pci_dev *pdev)
mlx5_devlink_free(devlink);
}
#define mlx5_pci_trace(dev, fmt, ...) ({ \
struct mlx5_core_dev *__dev = (dev); \
mlx5_core_info(__dev, "%s Device state = %d health sensors: %d pci_status: %d. " fmt, \
__func__, __dev->state, mlx5_health_check_fatal_sensors(__dev), \
__dev->pci_status, ##__VA_ARGS__); \
})
static const char *result2str(enum pci_ers_result result)
{
return result == PCI_ERS_RESULT_NEED_RESET ? "need reset" :
result == PCI_ERS_RESULT_DISCONNECT ? "disconnect" :
result == PCI_ERS_RESULT_RECOVERED ? "recovered" :
"unknown";
}
static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
enum pci_ers_result res;
mlx5_core_info(dev, "%s was called\n", __func__);
mlx5_pci_trace(dev, "Enter, pci channel state = %d\n", state);
mlx5_enter_error_state(dev, false);
mlx5_error_sw_reset(dev);
......@@ -1617,8 +1633,11 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
mlx5_drain_health_wq(dev);
mlx5_pci_disable_device(dev);
return state == pci_channel_io_perm_failure ?
res = state == pci_channel_io_perm_failure ?
PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
mlx5_pci_trace(dev, "Exit, result = %d, %s\n", res, result2str(res));
return res;
}
/* wait for the device to show vital signs by waiting
......@@ -1652,28 +1671,34 @@ static int wait_vital(struct pci_dev *pdev)
static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
{
enum pci_ers_result res = PCI_ERS_RESULT_DISCONNECT;
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
int err;
mlx5_core_info(dev, "%s was called\n", __func__);
mlx5_pci_trace(dev, "Enter\n");
err = mlx5_pci_enable_device(dev);
if (err) {
mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n",
__func__, err);
return PCI_ERS_RESULT_DISCONNECT;
goto out;
}
pci_set_master(pdev);
pci_restore_state(pdev);
pci_save_state(pdev);
if (wait_vital(pdev)) {
mlx5_core_err(dev, "%s: wait_vital timed out\n", __func__);
return PCI_ERS_RESULT_DISCONNECT;
err = wait_vital(pdev);
if (err) {
mlx5_core_err(dev, "%s: wait vital failed with error code: %d\n",
__func__, err);
goto out;
}
return PCI_ERS_RESULT_RECOVERED;
res = PCI_ERS_RESULT_RECOVERED;
out:
mlx5_pci_trace(dev, "Exit, err = %d, result = %d, %s\n", err, res, result2str(res));
return res;
}
static void mlx5_pci_resume(struct pci_dev *pdev)
......@@ -1681,14 +1706,12 @@ static void mlx5_pci_resume(struct pci_dev *pdev)
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
int err;
mlx5_core_info(dev, "%s was called\n", __func__);
mlx5_pci_trace(dev, "Enter, loading driver..\n");
err = mlx5_load_one(dev);
if (err)
mlx5_core_err(dev, "%s: mlx5_load_one failed with error code: %d\n",
__func__, err);
else
mlx5_core_info(dev, "%s: device recovered\n", __func__);
mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err,
!err ? "recovered" : "Failed");
}
static const struct pci_error_handlers mlx5_err_handler = {
......
......@@ -247,7 +247,7 @@ int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev)
{
struct mlx5_sf_hw_table *table;
u16 max_ext_fn = 0;
u16 ext_base_id;
u16 ext_base_id = 0;
u16 max_fn = 0;
u16 base_id;
int err;
......
......@@ -478,6 +478,10 @@ struct mlx5_fc_stats {
unsigned long next_query;
unsigned long sampling_interval; /* jiffies */
u32 *bulk_query_out;
int bulk_query_len;
size_t num_counters;
bool bulk_query_alloc_failed;
unsigned long next_bulk_query_alloc;
struct mlx5_fc_pool fc_pool;
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment