Commit d5efe5c2 authored by Jack Morgenstein's avatar Jack Morgenstein Committed by Willy Tarreau

net/mlx4_core: Fix racy CQ (Completion Queue) free

commit 291c566a upstream.

In function mlx4_cq_completion() and mlx4_cq_event(), the
radix_tree_lookup requires a rcu_read_lock.
This is mandatory: if another core frees the CQ, it could
run the radix_tree_node_rcu_free() call_rcu() callback while
its being used by the radix tree lookup function.

Additionally, in function mlx4_cq_event(), since we are adding
the rcu lock around the radix-tree lookup, we no longer need to take
the spinlock. Also, the synchronize_irq() call for the async event
eliminates the need for incrementing the cq reference count in
mlx4_cq_event().

Other changes:
1. In function mlx4_cq_free(), replace spin_lock_irq with spin_lock:
   we no longer take this spinlock in the interrupt context.
   The spinlock here, therefore, simply protects against different
   threads simultaneously invoking mlx4_cq_free() for different cq's.

2. In function mlx4_cq_free(), we move the radix tree delete to before
   the synchronize_irq() calls. This guarantees that we will not
   access this cq during any subsequent interrupts, and therefore can
   safely free the CQ after the synchronize_irq calls. The rcu_read_lock
   in the interrupt handlers only needs to protect against corrupting the
   radix tree; the interrupt handlers may access the cq outside the
   rcu_read_lock due to the synchronize_irq calls which protect against
   premature freeing of the cq.

3. In function mlx4_cq_event(), we change the mlx_warn message to mlx4_dbg.

4. We leave the cq reference count mechanism in place, because it is
   still needed for the cq completion tasklet mechanism.

Fixes: 6d90aa5c ("net/mlx4_core: Make sure there are no pending async events when freeing CQ")
Fixes: 225c7b1f ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters")
Signed-off-by: default avatarJack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: default avatarMatan Barak <matanb@mellanox.com>
Signed-off-by: default avatarTariq Toukan <tariqt@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
Signed-off-by: default avatarSumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: default avatarWilly Tarreau <w@1wt.eu>
parent 2b3d0c06
...@@ -57,13 +57,19 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn) ...@@ -57,13 +57,19 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
{ {
struct mlx4_cq *cq; struct mlx4_cq *cq;
rcu_read_lock();
cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree, cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
cqn & (dev->caps.num_cqs - 1)); cqn & (dev->caps.num_cqs - 1));
rcu_read_unlock();
if (!cq) { if (!cq) {
mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn); mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
return; return;
} }
/* Acessing the CQ outside of rcu_read_lock is safe, because
* the CQ is freed only after interrupt handling is completed.
*/
++cq->arm_sn; ++cq->arm_sn;
cq->comp(cq); cq->comp(cq);
...@@ -74,23 +80,19 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type) ...@@ -74,23 +80,19 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
struct mlx4_cq *cq; struct mlx4_cq *cq;
spin_lock(&cq_table->lock); rcu_read_lock();
cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1)); cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
if (cq) rcu_read_unlock();
atomic_inc(&cq->refcount);
spin_unlock(&cq_table->lock);
if (!cq) { if (!cq) {
mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn); mlx4_dbg(dev, "Async event for bogus CQ %08x\n", cqn);
return; return;
} }
/* Acessing the CQ outside of rcu_read_lock is safe, because
* the CQ is freed only after interrupt handling is completed.
*/
cq->event(cq, event_type); cq->event(cq, event_type);
if (atomic_dec_and_test(&cq->refcount))
complete(&cq->free);
} }
static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
...@@ -261,9 +263,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, ...@@ -261,9 +263,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
if (err) if (err)
return err; return err;
spin_lock_irq(&cq_table->lock); spin_lock(&cq_table->lock);
err = radix_tree_insert(&cq_table->tree, cq->cqn, cq); err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
spin_unlock_irq(&cq_table->lock); spin_unlock(&cq_table->lock);
if (err) if (err)
goto err_icm; goto err_icm;
...@@ -303,9 +305,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, ...@@ -303,9 +305,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
return 0; return 0;
err_radix: err_radix:
spin_lock_irq(&cq_table->lock); spin_lock(&cq_table->lock);
radix_tree_delete(&cq_table->tree, cq->cqn); radix_tree_delete(&cq_table->tree, cq->cqn);
spin_unlock_irq(&cq_table->lock); spin_unlock(&cq_table->lock);
err_icm: err_icm:
mlx4_cq_free_icm(dev, cq->cqn); mlx4_cq_free_icm(dev, cq->cqn);
...@@ -324,11 +326,11 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) ...@@ -324,11 +326,11 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
if (err) if (err)
mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
synchronize_irq(priv->eq_table.eq[cq->vector].irq); spin_lock(&cq_table->lock);
spin_lock_irq(&cq_table->lock);
radix_tree_delete(&cq_table->tree, cq->cqn); radix_tree_delete(&cq_table->tree, cq->cqn);
spin_unlock_irq(&cq_table->lock); spin_unlock(&cq_table->lock);
synchronize_irq(priv->eq_table.eq[cq->vector].irq);
if (atomic_dec_and_test(&cq->refcount)) if (atomic_dec_and_test(&cq->refcount))
complete(&cq->free); complete(&cq->free);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment