Commit de5ed007 authored by Artemy Kovalyov's avatar Artemy Kovalyov Committed by Jason Gunthorpe

IB/mlx5: Fix implicit ODP race

Following race may occur because of the call_srcu and the placement of
the synchronize_srcu vs the xa_erase.

CPU0				   CPU1

mlx5_ib_free_implicit_mr:	   destroy_unused_implicit_child_mr:
 xa_erase(odp_mkeys)
 synchronize_srcu()
				    xa_lock(implicit_children)
				    if (still in xarray)
				       atomic_inc()
				       call_srcu()
				    xa_unlock(implicit_children)
 xa_erase(implicit_children):
   xa_lock(implicit_children)
   __xa_erase()
   xa_unlock(implicit_children)

 flush_workqueue()
				   [..]
				    free_implicit_child_mr_rcu:
				     (via call_srcu)
				      queue_work()

 WARN_ON(atomic_read())
				   [..]
				    free_implicit_child_mr_work:
				     (via wq)
				      free_implicit_child_mr()
 mlx5_mr_cache_invalidate()
				     mlx5_ib_update_xlt() <-- UMR QP fail
				     atomic_dec()

The wait_event() solves the race because it blocks until
free_implicit_child_mr_work() completes.

Fixes: 5256edcb ("RDMA/mlx5: Rework implicit ODP destroy")
Link: https://lore.kernel.org/r/20200227113918.94432-1-leon@kernel.orgSigned-off-by: default avatarArtemy Kovalyov <artemyko@mellanox.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 817a68a6
...@@ -636,6 +636,7 @@ struct mlx5_ib_mr { ...@@ -636,6 +636,7 @@ struct mlx5_ib_mr {
/* For ODP and implicit */ /* For ODP and implicit */
atomic_t num_deferred_work; atomic_t num_deferred_work;
wait_queue_head_t q_deferred_work;
struct xarray implicit_children; struct xarray implicit_children;
union { union {
struct rcu_head rcu; struct rcu_head rcu;
......
...@@ -235,7 +235,8 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) ...@@ -235,7 +235,8 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt)
mr->parent = NULL; mr->parent = NULL;
mlx5_mr_cache_free(mr->dev, mr); mlx5_mr_cache_free(mr->dev, mr);
ib_umem_odp_release(odp); ib_umem_odp_release(odp);
atomic_dec(&imr->num_deferred_work); if (atomic_dec_and_test(&imr->num_deferred_work))
wake_up(&imr->q_deferred_work);
} }
static void free_implicit_child_mr_work(struct work_struct *work) static void free_implicit_child_mr_work(struct work_struct *work)
...@@ -554,6 +555,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, ...@@ -554,6 +555,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
imr->umem = &umem_odp->umem; imr->umem = &umem_odp->umem;
imr->is_odp_implicit = true; imr->is_odp_implicit = true;
atomic_set(&imr->num_deferred_work, 0); atomic_set(&imr->num_deferred_work, 0);
init_waitqueue_head(&imr->q_deferred_work);
xa_init(&imr->implicit_children); xa_init(&imr->implicit_children);
err = mlx5_ib_update_xlt(imr, 0, err = mlx5_ib_update_xlt(imr, 0,
...@@ -611,10 +613,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) ...@@ -611,10 +613,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
* under xa_lock while the child is in the xarray. Thus at this point * under xa_lock while the child is in the xarray. Thus at this point
* it is only decreasing, and all work holding it is now on the wq. * it is only decreasing, and all work holding it is now on the wq.
*/ */
if (atomic_read(&imr->num_deferred_work)) { wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work));
flush_workqueue(system_unbound_wq);
WARN_ON(atomic_read(&imr->num_deferred_work));
}
/* /*
* Fence the imr before we destroy the children. This allows us to * Fence the imr before we destroy the children. This allows us to
...@@ -645,10 +644,7 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) ...@@ -645,10 +644,7 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr)
/* Wait for all running page-fault handlers to finish. */ /* Wait for all running page-fault handlers to finish. */
synchronize_srcu(&mr->dev->odp_srcu); synchronize_srcu(&mr->dev->odp_srcu);
if (atomic_read(&mr->num_deferred_work)) { wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work));
flush_workqueue(system_unbound_wq);
WARN_ON(atomic_read(&mr->num_deferred_work));
}
dma_fence_odp_mr(mr); dma_fence_odp_mr(mr);
} }
...@@ -1720,7 +1716,8 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work) ...@@ -1720,7 +1716,8 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work)
u32 i; u32 i;
for (i = 0; i < work->num_sge; ++i) for (i = 0; i < work->num_sge; ++i)
atomic_dec(&work->frags[i].mr->num_deferred_work); if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work))
wake_up(&work->frags[i].mr->q_deferred_work);
kvfree(work); kvfree(work);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment