Commit 94c6825e authored by Matan Barak's avatar Matan Barak Committed by Doug Ledford

net/mlx5_core: Use tasklet for user-space CQ completion events

Previously, we've fired all our completion callbacks straight from
our ISR.

Some of those callbacks were lightweight (for example, mlx5 Ethernet
napi callbacks), but some of them did more work (for example,
the user-space RDMA stack uverbs' completion handler). Besides that,
doing more than the minimal work in ISR is generally considered wrong,
it could even lead to a hard lockup of the system. Since when a lot
of completion events are generated by the hardware, the loop over
those events could be so long, that we'll get into a hard lockup by
the system watchdog.

In order to avoid that, add a new way of invoking completion events
callbacks. In the interrupt itself, we add the CQs which receive
completion event to a per-EQ list and schedule a tasklet. In the
tasklet context we loop over all the CQs in the list and invoke the
user callback.
Signed-off-by: default avatarMatan Barak <matanb@mellanox.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent e3b6d8cf
...@@ -39,6 +39,53 @@ ...@@ -39,6 +39,53 @@
#include <linux/mlx5/cq.h> #include <linux/mlx5/cq.h>
#include "mlx5_core.h" #include "mlx5_core.h"
#define TASKLET_MAX_TIME 2
#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
void mlx5_cq_tasklet_cb(unsigned long data)
{
unsigned long flags;
unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
struct mlx5_eq_tasklet *ctx = (struct mlx5_eq_tasklet *)data;
struct mlx5_core_cq *mcq;
struct mlx5_core_cq *temp;
spin_lock_irqsave(&ctx->lock, flags);
list_splice_tail_init(&ctx->list, &ctx->process_list);
spin_unlock_irqrestore(&ctx->lock, flags);
list_for_each_entry_safe(mcq, temp, &ctx->process_list,
tasklet_ctx.list) {
list_del_init(&mcq->tasklet_ctx.list);
mcq->tasklet_ctx.comp(mcq);
if (atomic_dec_and_test(&mcq->refcount))
complete(&mcq->free);
if (time_after(jiffies, end))
break;
}
if (!list_empty(&ctx->process_list))
tasklet_schedule(&ctx->task);
}
static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
{
unsigned long flags;
struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;
spin_lock_irqsave(&tasklet_ctx->lock, flags);
/* When migrating CQs between EQs will be implemented, please note
* that you need to sync this point. It is possible that
* while migrating a CQ, completions on the old EQs could
* still arrive.
*/
if (list_empty_careful(&cq->tasklet_ctx.list)) {
atomic_inc(&cq->refcount);
list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
}
spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
}
void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn) void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
{ {
struct mlx5_core_cq *cq; struct mlx5_core_cq *cq;
...@@ -96,6 +143,13 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, ...@@ -96,6 +143,13 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
struct mlx5_create_cq_mbox_out out; struct mlx5_create_cq_mbox_out out;
struct mlx5_destroy_cq_mbox_in din; struct mlx5_destroy_cq_mbox_in din;
struct mlx5_destroy_cq_mbox_out dout; struct mlx5_destroy_cq_mbox_out dout;
int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
c_eqn);
struct mlx5_eq *eq;
eq = mlx5_eqn2eq(dev, eqn);
if (IS_ERR(eq))
return PTR_ERR(eq);
in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ); in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ);
memset(&out, 0, sizeof(out)); memset(&out, 0, sizeof(out));
...@@ -111,6 +165,11 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, ...@@ -111,6 +165,11 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
cq->arm_sn = 0; cq->arm_sn = 0;
atomic_set(&cq->refcount, 1); atomic_set(&cq->refcount, 1);
init_completion(&cq->free); init_completion(&cq->free);
if (!cq->comp)
cq->comp = mlx5_add_cq_to_tasklet;
/* assuming CQ will be deleted before the EQ */
cq->tasklet_ctx.priv = &eq->tasklet_ctx;
INIT_LIST_HEAD(&cq->tasklet_ctx.list);
spin_lock_irq(&table->lock); spin_lock_irq(&table->lock);
err = radix_tree_insert(&table->tree, cq->cqn, cq); err = radix_tree_insert(&table->tree, cq->cqn, cq);
......
...@@ -202,7 +202,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) ...@@ -202,7 +202,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
struct mlx5_eqe *eqe; struct mlx5_eqe *eqe;
int eqes_found = 0; int eqes_found = 0;
int set_ci = 0; int set_ci = 0;
u32 cqn; u32 cqn = -1;
u32 rsn; u32 rsn;
u8 port; u8 port;
...@@ -320,6 +320,9 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) ...@@ -320,6 +320,9 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
eq_update_ci(eq, 1); eq_update_ci(eq, 1);
if (cqn != -1)
tasklet_schedule(&eq->tasklet_ctx.task);
return eqes_found; return eqes_found;
} }
...@@ -403,6 +406,12 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, ...@@ -403,6 +406,12 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
if (err) if (err)
goto err_irq; goto err_irq;
INIT_LIST_HEAD(&eq->tasklet_ctx.list);
INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
spin_lock_init(&eq->tasklet_ctx.lock);
tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
(unsigned long)&eq->tasklet_ctx);
/* EQs are created in ARMED state /* EQs are created in ARMED state
*/ */
eq_update_ci(eq, 1); eq_update_ci(eq, 1);
...@@ -436,6 +445,7 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) ...@@ -436,6 +445,7 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
eq->eqn); eq->eqn);
synchronize_irq(eq->irqn); synchronize_irq(eq->irqn);
tasklet_disable(&eq->tasklet_ctx.task);
mlx5_buf_free(dev, &eq->buf); mlx5_buf_free(dev, &eq->buf);
return err; return err;
......
...@@ -660,6 +660,23 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, ...@@ -660,6 +660,23 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
} }
EXPORT_SYMBOL(mlx5_vector2eqn); EXPORT_SYMBOL(mlx5_vector2eqn);
struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn)
{
struct mlx5_eq_table *table = &dev->priv.eq_table;
struct mlx5_eq *eq;
spin_lock(&table->lock);
list_for_each_entry(eq, &table->comp_eqs_list, list)
if (eq->eqn == eqn) {
spin_unlock(&table->lock);
return eq;
}
spin_unlock(&table->lock);
return ERR_PTR(-ENOENT);
}
static void free_comp_eqs(struct mlx5_core_dev *dev) static void free_comp_eqs(struct mlx5_core_dev *dev)
{ {
struct mlx5_eq_table *table = &dev->priv.eq_table; struct mlx5_eq_table *table = &dev->priv.eq_table;
......
...@@ -100,6 +100,8 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); ...@@ -100,6 +100,8 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev); cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev);
u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx); u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx);
struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
void mlx5_cq_tasklet_cb(unsigned long data);
void mlx5e_init(void); void mlx5e_init(void);
void mlx5e_cleanup(void); void mlx5e_cleanup(void);
......
...@@ -53,6 +53,11 @@ struct mlx5_core_cq { ...@@ -53,6 +53,11 @@ struct mlx5_core_cq {
unsigned arm_sn; unsigned arm_sn;
struct mlx5_rsc_debug *dbg; struct mlx5_rsc_debug *dbg;
int pid; int pid;
struct {
struct list_head list;
void (*comp)(struct mlx5_core_cq *);
void *priv;
} tasklet_ctx;
}; };
......
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/radix-tree.h> #include <linux/radix-tree.h>
#include <linux/interrupt.h>
#include <linux/mlx5/device.h> #include <linux/mlx5/device.h>
#include <linux/mlx5/doorbell.h> #include <linux/mlx5/doorbell.h>
...@@ -304,6 +305,14 @@ struct mlx5_buf { ...@@ -304,6 +305,14 @@ struct mlx5_buf {
u8 page_shift; u8 page_shift;
}; };
struct mlx5_eq_tasklet {
struct list_head list;
struct list_head process_list;
struct tasklet_struct task;
/* lock on completion tasklet list */
spinlock_t lock;
};
struct mlx5_eq { struct mlx5_eq {
struct mlx5_core_dev *dev; struct mlx5_core_dev *dev;
__be32 __iomem *doorbell; __be32 __iomem *doorbell;
...@@ -317,6 +326,7 @@ struct mlx5_eq { ...@@ -317,6 +326,7 @@ struct mlx5_eq {
struct list_head list; struct list_head list;
int index; int index;
struct mlx5_rsc_debug *dbg; struct mlx5_rsc_debug *dbg;
struct mlx5_eq_tasklet tasklet_ctx;
}; };
struct mlx5_core_psv { struct mlx5_core_psv {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment