Commit de749452 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge tag 'mlx5-updates-2023-03-20' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2023-03-20

mlx5 dynamic msix

This patch series adds support for dynamic msix vectors allocation in mlx5.

Eli Cohen Says:

================

The following series of patches modifies mlx5_core to work with the
dynamic MSIX API. Currently, mlx5_core allocates all the interrupt
vectors it needs and distributes them amongst the consumers. With the
introduction of dynamic MSIX support, which allows for allocation of
interrupts more than once, we now allocate vectors as we need them.
This allows other drivers running on top of mlx5_core to allocate
interrupt vectors for their own use. An example for this is mlx5_vdpa,
which uses these vectors to propagate interrupts directly from the
hardware to the vCPU [1].

As a preparation for using this series, a use after free issue is fixed
in lib/cpu_rmap.c and the allocator for rmap entries has been modified.
A complementary API for irq_cpu_rmap_add() has also been introduced.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git/patch/?id=0f2bf1fcae96a83b8c5581854713c9fc3407556e

================

* tag 'mlx5-updates-2023-03-20' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux:
  net/mlx5: Provide external API for allocating vectors
  net/mlx5: Use one completion vector if eth is disabled
  net/mlx5: Refactor calculation of required completion vectors
  net/mlx5: Move devlink registration before mlx5_load
  net/mlx5: Use dynamic msix vectors allocation
  net/mlx5: Refactor completion irq request/release code
  net/mlx5: Improve naming of pci function vectors
  net/mlx5: Use newer affinity descriptor
  net/mlx5: Modify struct mlx5_irq to use struct msi_map
  net/mlx5: Fix wrong comment
  net/mlx5e: Coding style fix, add empty line
  lib: cpu_rmap: Add irq_cpu_rmap_remove to complement irq_cpu_rmap_add
  lib: cpu_rmap: Use allocator for rmap entries
  lib: cpu_rmap: Avoid use after free on rmap->obj array entries
====================

Link: https://lore.kernel.org/r/20230324231341.29808-1-saeed@kernel.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents e70f94c6 fb0a6a26
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include <linux/mlx5/mlx5_ifc_vdpa.h> #include <linux/mlx5/mlx5_ifc_vdpa.h>
#include <linux/mlx5/vport.h> #include <linux/mlx5/vport.h>
#include "mlx5_core.h" #include "mlx5_core.h"
#include "devlink.h"
/* intf dev list mutex */ /* intf dev list mutex */
static DEFINE_MUTEX(mlx5_intf_mutex); static DEFINE_MUTEX(mlx5_intf_mutex);
...@@ -109,17 +110,6 @@ bool mlx5_eth_supported(struct mlx5_core_dev *dev) ...@@ -109,17 +110,6 @@ bool mlx5_eth_supported(struct mlx5_core_dev *dev)
return true; return true;
} }
static bool is_eth_enabled(struct mlx5_core_dev *dev)
{
union devlink_param_value val;
int err;
err = devl_param_driverinit_value_get(priv_to_devlink(dev),
DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
&val);
return err ? false : val.vbool;
}
bool mlx5_vnet_supported(struct mlx5_core_dev *dev) bool mlx5_vnet_supported(struct mlx5_core_dev *dev)
{ {
if (!IS_ENABLED(CONFIG_MLX5_VDPA_NET)) if (!IS_ENABLED(CONFIG_MLX5_VDPA_NET))
...@@ -251,7 +241,7 @@ static const struct mlx5_adev_device { ...@@ -251,7 +241,7 @@ static const struct mlx5_adev_device {
.is_enabled = &is_ib_enabled }, .is_enabled = &is_ib_enabled },
[MLX5_INTERFACE_PROTOCOL_ETH] = { .suffix = "eth", [MLX5_INTERFACE_PROTOCOL_ETH] = { .suffix = "eth",
.is_supported = &mlx5_eth_supported, .is_supported = &mlx5_eth_supported,
.is_enabled = &is_eth_enabled }, .is_enabled = &mlx5_core_is_eth_enabled },
[MLX5_INTERFACE_PROTOCOL_ETH_REP] = { .suffix = "eth-rep", [MLX5_INTERFACE_PROTOCOL_ETH_REP] = { .suffix = "eth-rep",
.is_supported = &is_eth_rep_supported }, .is_supported = &is_eth_rep_supported },
[MLX5_INTERFACE_PROTOCOL_IB_REP] = { .suffix = "rdma-rep", [MLX5_INTERFACE_PROTOCOL_IB_REP] = { .suffix = "rdma-rep",
......
...@@ -46,4 +46,15 @@ void mlx5_devlink_free(struct devlink *devlink); ...@@ -46,4 +46,15 @@ void mlx5_devlink_free(struct devlink *devlink);
int mlx5_devlink_params_register(struct devlink *devlink); int mlx5_devlink_params_register(struct devlink *devlink);
void mlx5_devlink_params_unregister(struct devlink *devlink); void mlx5_devlink_params_unregister(struct devlink *devlink);
static inline bool mlx5_core_is_eth_enabled(struct mlx5_core_dev *dev)
{
union devlink_param_value val;
int err;
err = devl_param_driverinit_value_get(priv_to_devlink(dev),
DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
&val);
return err ? false : val.vbool;
}
#endif /* __MLX5_DEVLINK_H__ */ #endif /* __MLX5_DEVLINK_H__ */
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "lib/clock.h" #include "lib/clock.h"
#include "diag/fw_tracer.h" #include "diag/fw_tracer.h"
#include "mlx5_irq.h" #include "mlx5_irq.h"
#include "pci_irq.h"
#include "devlink.h" #include "devlink.h"
#include "en_accel/ipsec.h" #include "en_accel/ipsec.h"
...@@ -61,9 +62,7 @@ struct mlx5_eq_table { ...@@ -61,9 +62,7 @@ struct mlx5_eq_table {
struct mlx5_irq_table *irq_table; struct mlx5_irq_table *irq_table;
struct mlx5_irq **comp_irqs; struct mlx5_irq **comp_irqs;
struct mlx5_irq *ctrl_irq; struct mlx5_irq *ctrl_irq;
#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *rmap; struct cpu_rmap *rmap;
#endif
}; };
#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
...@@ -637,6 +636,7 @@ static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) ...@@ -637,6 +636,7 @@ static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err);
return MLX5_NUM_ASYNC_EQE; return MLX5_NUM_ASYNC_EQE;
} }
static int create_async_eqs(struct mlx5_core_dev *dev) static int create_async_eqs(struct mlx5_core_dev *dev)
{ {
struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_table *table = dev->priv.eq_table;
...@@ -803,44 +803,28 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) ...@@ -803,44 +803,28 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
} }
EXPORT_SYMBOL(mlx5_eq_update_ci); EXPORT_SYMBOL(mlx5_eq_update_ci);
static void comp_irqs_release(struct mlx5_core_dev *dev) static void comp_irqs_release_pci(struct mlx5_core_dev *dev)
{ {
struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_table *table = dev->priv.eq_table;
if (mlx5_core_is_sf(dev))
mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs);
else
mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs); mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs);
kfree(table->comp_irqs);
} }
static int comp_irqs_request(struct mlx5_core_dev *dev) static int comp_irqs_request_pci(struct mlx5_core_dev *dev)
{ {
struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_table *table = dev->priv.eq_table;
const struct cpumask *prev = cpu_none_mask; const struct cpumask *prev = cpu_none_mask;
const struct cpumask *mask; const struct cpumask *mask;
int ncomp_eqs = table->num_comp_eqs; int ncomp_eqs;
u16 *cpus; u16 *cpus;
int ret; int ret;
int cpu; int cpu;
int i; int i;
ncomp_eqs = table->num_comp_eqs; ncomp_eqs = table->num_comp_eqs;
table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
if (!table->comp_irqs)
return -ENOMEM;
if (mlx5_core_is_sf(dev)) {
ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs);
if (ret < 0)
goto free_irqs;
return ret;
}
cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL); cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL);
if (!cpus) { if (!cpus)
ret = -ENOMEM; ret = -ENOMEM;
goto free_irqs;
}
i = 0; i = 0;
rcu_read_lock(); rcu_read_lock();
...@@ -854,17 +838,89 @@ static int comp_irqs_request(struct mlx5_core_dev *dev) ...@@ -854,17 +838,89 @@ static int comp_irqs_request(struct mlx5_core_dev *dev)
} }
spread_done: spread_done:
rcu_read_unlock(); rcu_read_unlock();
ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs); ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs, &table->rmap);
kfree(cpus); kfree(cpus);
if (ret < 0)
goto free_irqs;
return ret; return ret;
}
static void comp_irqs_release_sf(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs);
}
static int comp_irqs_request_sf(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
int ncomp_eqs = table->num_comp_eqs;
return mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs);
}
static void comp_irqs_release(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
mlx5_core_is_sf(dev) ? comp_irqs_release_sf(dev) :
comp_irqs_release_pci(dev);
free_irqs:
kfree(table->comp_irqs); kfree(table->comp_irqs);
}
static int comp_irqs_request(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
int ncomp_eqs;
int ret;
ncomp_eqs = table->num_comp_eqs;
table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
if (!table->comp_irqs)
return -ENOMEM;
ret = mlx5_core_is_sf(dev) ? comp_irqs_request_sf(dev) :
comp_irqs_request_pci(dev);
if (ret < 0)
kfree(table->comp_irqs);
return ret; return ret;
} }
#ifdef CONFIG_RFS_ACCEL
static int alloc_rmap(struct mlx5_core_dev *mdev)
{
struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
/* rmap is a mapping between irq number and queue number.
* Each irq can be assigned only to a single rmap.
* Since SFs share IRQs, rmap mapping cannot function correctly
* for irqs that are shared between different core/netdev RX rings.
* Hence we don't allow netdev rmap for SFs.
*/
if (mlx5_core_is_sf(mdev))
return 0;
eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs);
if (!eq_table->rmap)
return -ENOMEM;
return 0;
}
static void free_rmap(struct mlx5_core_dev *mdev)
{
struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
if (eq_table->rmap) {
free_irq_cpu_rmap(eq_table->rmap);
eq_table->rmap = NULL;
}
}
#else
static int alloc_rmap(struct mlx5_core_dev *mdev) { return 0; }
static void free_rmap(struct mlx5_core_dev *mdev) {}
#endif
static void destroy_comp_eqs(struct mlx5_core_dev *dev) static void destroy_comp_eqs(struct mlx5_core_dev *dev)
{ {
struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_table *table = dev->priv.eq_table;
...@@ -880,6 +936,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) ...@@ -880,6 +936,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev)
kfree(eq); kfree(eq);
} }
comp_irqs_release(dev); comp_irqs_release(dev);
free_rmap(dev);
} }
static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
...@@ -906,9 +963,16 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) ...@@ -906,9 +963,16 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
int err; int err;
int i; int i;
err = alloc_rmap(dev);
if (err)
return err;
ncomp_eqs = comp_irqs_request(dev); ncomp_eqs = comp_irqs_request(dev);
if (ncomp_eqs < 0) if (ncomp_eqs < 0) {
return ncomp_eqs; err = ncomp_eqs;
goto err_irqs_req;
}
INIT_LIST_HEAD(&table->comp_eqs_list); INIT_LIST_HEAD(&table->comp_eqs_list);
nent = comp_eq_depth_devlink_param_get(dev); nent = comp_eq_depth_devlink_param_get(dev);
...@@ -953,6 +1017,8 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) ...@@ -953,6 +1017,8 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
kfree(eq); kfree(eq);
clean: clean:
destroy_comp_eqs(dev); destroy_comp_eqs(dev);
err_irqs_req:
free_rmap(dev);
return err; return err;
} }
...@@ -1031,55 +1097,12 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) ...@@ -1031,55 +1097,12 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
} }
static void clear_rmap(struct mlx5_core_dev *dev)
{
#ifdef CONFIG_RFS_ACCEL
struct mlx5_eq_table *eq_table = dev->priv.eq_table;
free_irq_cpu_rmap(eq_table->rmap);
#endif
}
static int set_rmap(struct mlx5_core_dev *mdev)
{
int err = 0;
#ifdef CONFIG_RFS_ACCEL
struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
int vecidx;
eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs);
if (!eq_table->rmap) {
err = -ENOMEM;
mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
goto err_out;
}
for (vecidx = 0; vecidx < eq_table->num_comp_eqs; vecidx++) {
err = irq_cpu_rmap_add(eq_table->rmap,
pci_irq_vector(mdev->pdev, vecidx));
if (err) {
mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
err);
goto err_irq_cpu_rmap_add;
}
}
return 0;
err_irq_cpu_rmap_add:
clear_rmap(mdev);
err_out:
#endif
return err;
}
/* This function should only be called after mlx5_cmd_force_teardown_hca */ /* This function should only be called after mlx5_cmd_force_teardown_hca */
void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
{ {
struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_table *table = dev->priv.eq_table;
mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
if (!mlx5_core_is_sf(dev))
clear_rmap(dev);
mlx5_irq_table_destroy(dev); mlx5_irq_table_destroy(dev);
mutex_unlock(&table->lock); mutex_unlock(&table->lock);
} }
...@@ -1090,44 +1113,47 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) ...@@ -1090,44 +1113,47 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
#define MLX5_MAX_ASYNC_EQS 3 #define MLX5_MAX_ASYNC_EQS 3
#endif #endif
int mlx5_eq_table_create(struct mlx5_core_dev *dev) static int get_num_eqs(struct mlx5_core_dev *dev)
{ {
struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_eq_table *eq_table = dev->priv.eq_table;
int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? int max_dev_eqs;
int max_eqs_sf;
int num_eqs;
/* If ethernet is disabled we use just a single completion vector to
* have the other vectors available for other drivers using mlx5_core. For
* example, mlx5_vdpa
*/
if (!mlx5_core_is_eth_enabled(dev) && mlx5_eth_supported(dev))
return 1;
max_dev_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
MLX5_CAP_GEN(dev, max_num_eqs) : MLX5_CAP_GEN(dev, max_num_eqs) :
1 << MLX5_CAP_GEN(dev, log_max_eq); 1 << MLX5_CAP_GEN(dev, log_max_eq);
int max_eqs_sf;
int err;
eq_table->num_comp_eqs = num_eqs = min_t(int, mlx5_irq_table_get_num_comp(eq_table->irq_table),
min_t(int, max_dev_eqs - MLX5_MAX_ASYNC_EQS);
mlx5_irq_table_get_num_comp(eq_table->irq_table),
num_eqs - MLX5_MAX_ASYNC_EQS);
if (mlx5_core_is_sf(dev)) { if (mlx5_core_is_sf(dev)) {
max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF, max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF,
mlx5_irq_table_get_sfs_vec(eq_table->irq_table)); mlx5_irq_table_get_sfs_vec(eq_table->irq_table));
eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs, num_eqs = min_t(int, num_eqs, max_eqs_sf);
max_eqs_sf);
} }
return num_eqs;
}
int mlx5_eq_table_create(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *eq_table = dev->priv.eq_table;
int err;
eq_table->num_comp_eqs = get_num_eqs(dev);
err = create_async_eqs(dev); err = create_async_eqs(dev);
if (err) { if (err) {
mlx5_core_err(dev, "Failed to create async EQs\n"); mlx5_core_err(dev, "Failed to create async EQs\n");
goto err_async_eqs; goto err_async_eqs;
} }
if (!mlx5_core_is_sf(dev)) {
/* rmap is a mapping between irq number and queue number.
* each irq can be assign only to a single rmap.
* since SFs share IRQs, rmap mapping cannot function correctly
* for irqs that are shared for different core/netdev RX rings.
* Hence we don't allow netdev rmap for SFs
*/
err = set_rmap(dev);
if (err)
goto err_rmap;
}
err = create_comp_eqs(dev); err = create_comp_eqs(dev);
if (err) { if (err) {
mlx5_core_err(dev, "Failed to create completion EQs\n"); mlx5_core_err(dev, "Failed to create completion EQs\n");
...@@ -1135,10 +1161,8 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) ...@@ -1135,10 +1161,8 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
} }
return 0; return 0;
err_comp_eqs: err_comp_eqs:
if (!mlx5_core_is_sf(dev))
clear_rmap(dev);
err_rmap:
destroy_async_eqs(dev); destroy_async_eqs(dev);
err_async_eqs: err_async_eqs:
return err; return err;
...@@ -1146,8 +1170,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) ...@@ -1146,8 +1170,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
{ {
if (!mlx5_core_is_sf(dev))
clear_rmap(dev);
destroy_comp_eqs(dev); destroy_comp_eqs(dev);
destroy_async_eqs(dev); destroy_async_eqs(dev);
} }
......
...@@ -45,30 +45,28 @@ static int cpu_get_least_loaded(struct mlx5_irq_pool *pool, ...@@ -45,30 +45,28 @@ static int cpu_get_least_loaded(struct mlx5_irq_pool *pool,
/* Creating an IRQ from irq_pool */ /* Creating an IRQ from irq_pool */
static struct mlx5_irq * static struct mlx5_irq *
irq_pool_request_irq(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc)
{ {
cpumask_var_t auto_mask; struct irq_affinity_desc auto_desc = {};
struct mlx5_irq *irq;
u32 irq_index; u32 irq_index;
int err; int err;
if (!zalloc_cpumask_var(&auto_mask, GFP_KERNEL))
return ERR_PTR(-ENOMEM);
err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL); err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL);
if (err) if (err)
return ERR_PTR(err); return ERR_PTR(err);
if (pool->irqs_per_cpu) { if (pool->irqs_per_cpu) {
if (cpumask_weight(req_mask) > 1) if (cpumask_weight(&af_desc->mask) > 1)
/* if req_mask contain more then one CPU, set the least loadad CPU /* if req_mask contain more then one CPU, set the least loadad CPU
* of req_mask * of req_mask
*/ */
cpumask_set_cpu(cpu_get_least_loaded(pool, req_mask), auto_mask); cpumask_set_cpu(cpu_get_least_loaded(pool, &af_desc->mask),
&auto_desc.mask);
else else
cpu_get(pool, cpumask_first(req_mask)); cpu_get(pool, cpumask_first(&af_desc->mask));
} }
irq = mlx5_irq_alloc(pool, irq_index, cpumask_empty(auto_mask) ? req_mask : auto_mask); return mlx5_irq_alloc(pool, irq_index,
free_cpumask_var(auto_mask); cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc,
return irq; NULL);
} }
/* Looking for the IRQ with the smallest refcount that fits req_mask. /* Looking for the IRQ with the smallest refcount that fits req_mask.
...@@ -115,22 +113,22 @@ irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req ...@@ -115,22 +113,22 @@ irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req
/** /**
* mlx5_irq_affinity_request - request an IRQ according to the given mask. * mlx5_irq_affinity_request - request an IRQ according to the given mask.
* @pool: IRQ pool to request from. * @pool: IRQ pool to request from.
* @req_mask: cpumask requested for this IRQ. * @af_desc: affinity descriptor for this IRQ.
* *
* This function returns a pointer to IRQ, or ERR_PTR in case of error. * This function returns a pointer to IRQ, or ERR_PTR in case of error.
*/ */
struct mlx5_irq * struct mlx5_irq *
mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc)
{ {
struct mlx5_irq *least_loaded_irq, *new_irq; struct mlx5_irq *least_loaded_irq, *new_irq;
mutex_lock(&pool->lock); mutex_lock(&pool->lock);
least_loaded_irq = irq_pool_find_least_loaded(pool, req_mask); least_loaded_irq = irq_pool_find_least_loaded(pool, &af_desc->mask);
if (least_loaded_irq && if (least_loaded_irq &&
mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold) mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold)
goto out; goto out;
/* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */ /* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */
new_irq = irq_pool_request_irq(pool, req_mask); new_irq = irq_pool_request_irq(pool, af_desc);
if (IS_ERR(new_irq)) { if (IS_ERR(new_irq)) {
if (!least_loaded_irq) { if (!least_loaded_irq) {
/* We failed to create an IRQ and we didn't find an IRQ */ /* We failed to create an IRQ and we didn't find an IRQ */
...@@ -194,32 +192,30 @@ int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, ...@@ -194,32 +192,30 @@ int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
struct mlx5_irq **irqs) struct mlx5_irq **irqs)
{ {
struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
cpumask_var_t req_mask; struct irq_affinity_desc af_desc = {};
struct mlx5_irq *irq; struct mlx5_irq *irq;
int i = 0; int i = 0;
if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) af_desc.is_managed = 1;
return -ENOMEM; cpumask_copy(&af_desc.mask, cpu_online_mask);
cpumask_copy(req_mask, cpu_online_mask);
for (i = 0; i < nirqs; i++) { for (i = 0; i < nirqs; i++) {
if (mlx5_irq_pool_is_sf_pool(pool)) if (mlx5_irq_pool_is_sf_pool(pool))
irq = mlx5_irq_affinity_request(pool, req_mask); irq = mlx5_irq_affinity_request(pool, &af_desc);
else else
/* In case SF pool doesn't exists, fallback to the PF IRQs. /* In case SF pool doesn't exists, fallback to the PF IRQs.
* The PF IRQs are already allocated and binded to CPU * The PF IRQs are already allocated and binded to CPU
* at this point. Hence, only an index is needed. * at this point. Hence, only an index is needed.
*/ */
irq = mlx5_irq_request(dev, i, NULL); irq = mlx5_irq_request(dev, i, NULL, NULL);
if (IS_ERR(irq)) if (IS_ERR(irq))
break; break;
irqs[i] = irq; irqs[i] = irq;
cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), req_mask); cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), &af_desc.mask);
mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
} }
free_cpumask_var(req_mask);
if (!i) if (!i)
return PTR_ERR(irq); return PTR_ERR(irq);
return i; return i;
......
...@@ -46,9 +46,6 @@ ...@@ -46,9 +46,6 @@
#include <linux/kmod.h> #include <linux/kmod.h>
#include <linux/mlx5/mlx5_ifc.h> #include <linux/mlx5/mlx5_ifc.h>
#include <linux/mlx5/vport.h> #include <linux/mlx5/vport.h>
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
#include <linux/version.h> #include <linux/version.h>
#include <net/devlink.h> #include <net/devlink.h>
#include "mlx5_core.h" #include "mlx5_core.h"
...@@ -1401,16 +1398,16 @@ int mlx5_init_one(struct mlx5_core_dev *dev) ...@@ -1401,16 +1398,16 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
goto function_teardown; goto function_teardown;
} }
err = mlx5_devlink_params_register(priv_to_devlink(dev));
if (err)
goto err_devlink_params_reg;
err = mlx5_load(dev); err = mlx5_load(dev);
if (err) if (err)
goto err_load; goto err_load;
set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
err = mlx5_devlink_params_register(priv_to_devlink(dev));
if (err)
goto err_devlink_params_reg;
err = mlx5_register_device(dev); err = mlx5_register_device(dev);
if (err) if (err)
goto err_register; goto err_register;
...@@ -1420,11 +1417,11 @@ int mlx5_init_one(struct mlx5_core_dev *dev) ...@@ -1420,11 +1417,11 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
return 0; return 0;
err_register: err_register:
mlx5_devlink_params_unregister(priv_to_devlink(dev));
err_devlink_params_reg:
clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
mlx5_unload(dev); mlx5_unload(dev);
err_load: err_load:
mlx5_devlink_params_unregister(priv_to_devlink(dev));
err_devlink_params_reg:
mlx5_cleanup_once(dev); mlx5_cleanup_once(dev);
function_teardown: function_teardown:
mlx5_function_teardown(dev, true); mlx5_function_teardown(dev, true);
...@@ -1443,7 +1440,6 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev) ...@@ -1443,7 +1440,6 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
mutex_lock(&dev->intf_state_mutex); mutex_lock(&dev->intf_state_mutex);
mlx5_unregister_device(dev); mlx5_unregister_device(dev);
mlx5_devlink_params_unregister(priv_to_devlink(dev));
if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
mlx5_core_warn(dev, "%s: interface is down, NOP\n", mlx5_core_warn(dev, "%s: interface is down, NOP\n",
...@@ -1454,6 +1450,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev) ...@@ -1454,6 +1450,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
mlx5_unload(dev); mlx5_unload(dev);
mlx5_devlink_params_unregister(priv_to_devlink(dev));
mlx5_cleanup_once(dev); mlx5_cleanup_once(dev);
mlx5_function_teardown(dev, true); mlx5_function_teardown(dev, true);
out: out:
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#define MLX5_COMP_EQS_PER_SF 8 #define MLX5_COMP_EQS_PER_SF 8
struct mlx5_irq; struct mlx5_irq;
struct cpu_rmap;
int mlx5_irq_table_init(struct mlx5_core_dev *dev); int mlx5_irq_table_init(struct mlx5_core_dev *dev);
void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
...@@ -25,9 +26,10 @@ int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs); ...@@ -25,9 +26,10 @@ int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev); struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev);
void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq); void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq);
struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
struct cpumask *affinity); struct irq_affinity_desc *af_desc,
struct cpu_rmap **rmap);
int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
struct mlx5_irq **irqs); struct mlx5_irq **irqs, struct cpu_rmap **rmap);
void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs); void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs);
int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb); int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb); int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
...@@ -39,7 +41,7 @@ struct mlx5_irq_pool; ...@@ -39,7 +41,7 @@ struct mlx5_irq_pool;
int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
struct mlx5_irq **irqs); struct mlx5_irq **irqs);
struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool,
const struct cpumask *req_mask); struct irq_affinity_desc *af_desc);
void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs, void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs,
int num_irqs); int num_irqs);
#else #else
...@@ -50,7 +52,7 @@ static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, ...@@ -50,7 +52,7 @@ static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev,
} }
static inline struct mlx5_irq * static inline struct mlx5_irq *
mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc)
{ {
return ERR_PTR(-EOPNOTSUPP); return ERR_PTR(-EOPNOTSUPP);
} }
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#define MLX5_EQ_REFS_PER_IRQ (2) #define MLX5_EQ_REFS_PER_IRQ (2)
struct mlx5_irq; struct mlx5_irq;
struct cpu_rmap;
struct mlx5_irq_pool { struct mlx5_irq_pool {
char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS]; char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS];
...@@ -31,7 +32,8 @@ static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) ...@@ -31,7 +32,8 @@ static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool)
} }
struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
const struct cpumask *affinity); struct irq_affinity_desc *af_desc,
struct cpu_rmap **rmap);
int mlx5_irq_get_locked(struct mlx5_irq *irq); int mlx5_irq_get_locked(struct mlx5_irq *irq);
int mlx5_irq_read_locked(struct mlx5_irq *irq); int mlx5_irq_read_locked(struct mlx5_irq *irq);
int mlx5_irq_put(struct mlx5_irq *irq); int mlx5_irq_put(struct mlx5_irq *irq);
......
...@@ -16,14 +16,13 @@ ...@@ -16,14 +16,13 @@
* struct cpu_rmap - CPU affinity reverse-map * struct cpu_rmap - CPU affinity reverse-map
* @refcount: kref for object * @refcount: kref for object
* @size: Number of objects to be reverse-mapped * @size: Number of objects to be reverse-mapped
* @used: Number of objects added
* @obj: Pointer to array of object pointers * @obj: Pointer to array of object pointers
* @near: For each CPU, the index and distance to the nearest object, * @near: For each CPU, the index and distance to the nearest object,
* based on affinity masks * based on affinity masks
*/ */
struct cpu_rmap { struct cpu_rmap {
struct kref refcount; struct kref refcount;
u16 size, used; u16 size;
void **obj; void **obj;
struct { struct {
u16 index; u16 index;
...@@ -61,6 +60,7 @@ static inline struct cpu_rmap *alloc_irq_cpu_rmap(unsigned int size) ...@@ -61,6 +60,7 @@ static inline struct cpu_rmap *alloc_irq_cpu_rmap(unsigned int size)
} }
extern void free_irq_cpu_rmap(struct cpu_rmap *rmap); extern void free_irq_cpu_rmap(struct cpu_rmap *rmap);
int irq_cpu_rmap_remove(struct cpu_rmap *rmap, int irq);
extern int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq); extern int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq);
#endif /* __LINUX_CPU_RMAP_H */ #endif /* __LINUX_CPU_RMAP_H */
...@@ -1311,4 +1311,10 @@ enum { ...@@ -1311,4 +1311,10 @@ enum {
MLX5_OCTWORD = 16, MLX5_OCTWORD = 16,
}; };
struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev,
irqreturn_t (*handler)(int, void *),
const struct irq_affinity_desc *affdesc,
const char *name);
void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map);
#endif /* MLX5_DRIVER_H */ #endif /* MLX5_DRIVER_H */
...@@ -128,19 +128,31 @@ debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix) ...@@ -128,19 +128,31 @@ debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
} }
#endif #endif
static int get_free_index(struct cpu_rmap *rmap)
{
int i;
for (i = 0; i < rmap->size; i++)
if (!rmap->obj[i])
return i;
return -ENOSPC;
}
/** /**
* cpu_rmap_add - add object to a rmap * cpu_rmap_add - add object to a rmap
* @rmap: CPU rmap allocated with alloc_cpu_rmap() * @rmap: CPU rmap allocated with alloc_cpu_rmap()
* @obj: Object to add to rmap * @obj: Object to add to rmap
* *
* Return index of object. * Return index of object or -ENOSPC if no free entry was found
*/ */
int cpu_rmap_add(struct cpu_rmap *rmap, void *obj) int cpu_rmap_add(struct cpu_rmap *rmap, void *obj)
{ {
u16 index; int index = get_free_index(rmap);
if (index < 0)
return index;
BUG_ON(rmap->used >= rmap->size);
index = rmap->used++;
rmap->obj[index] = obj; rmap->obj[index] = obj;
return index; return index;
} }
...@@ -230,8 +242,9 @@ void free_irq_cpu_rmap(struct cpu_rmap *rmap) ...@@ -230,8 +242,9 @@ void free_irq_cpu_rmap(struct cpu_rmap *rmap)
if (!rmap) if (!rmap)
return; return;
for (index = 0; index < rmap->used; index++) { for (index = 0; index < rmap->size; index++) {
glue = rmap->obj[index]; glue = rmap->obj[index];
if (glue)
irq_set_affinity_notifier(glue->notify.irq, NULL); irq_set_affinity_notifier(glue->notify.irq, NULL);
} }
...@@ -268,9 +281,21 @@ static void irq_cpu_rmap_release(struct kref *ref) ...@@ -268,9 +281,21 @@ static void irq_cpu_rmap_release(struct kref *ref)
container_of(ref, struct irq_glue, notify.kref); container_of(ref, struct irq_glue, notify.kref);
cpu_rmap_put(glue->rmap); cpu_rmap_put(glue->rmap);
glue->rmap->obj[glue->index] = NULL;
kfree(glue); kfree(glue);
} }
/**
* irq_cpu_rmap_remove - remove an IRQ from a CPU affinity reverse-map
* @rmap: The reverse-map
* @irq: The IRQ number
*/
int irq_cpu_rmap_remove(struct cpu_rmap *rmap, int irq)
{
return irq_set_affinity_notifier(irq, NULL);
}
EXPORT_SYMBOL(irq_cpu_rmap_remove);
/** /**
* irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map
* @rmap: The reverse-map * @rmap: The reverse-map
...@@ -293,12 +318,22 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq) ...@@ -293,12 +318,22 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
glue->notify.release = irq_cpu_rmap_release; glue->notify.release = irq_cpu_rmap_release;
glue->rmap = rmap; glue->rmap = rmap;
cpu_rmap_get(rmap); cpu_rmap_get(rmap);
glue->index = cpu_rmap_add(rmap, glue); rc = cpu_rmap_add(rmap, glue);
if (rc < 0)
goto err_add;
glue->index = rc;
rc = irq_set_affinity_notifier(irq, &glue->notify); rc = irq_set_affinity_notifier(irq, &glue->notify);
if (rc) { if (rc)
goto err_set;
return rc;
err_set:
rmap->obj[glue->index] = NULL;
err_add:
cpu_rmap_put(glue->rmap); cpu_rmap_put(glue->rmap);
kfree(glue); kfree(glue);
}
return rc; return rc;
} }
EXPORT_SYMBOL(irq_cpu_rmap_add); EXPORT_SYMBOL(irq_cpu_rmap_add);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment