Commit 3d5f3c54 authored by Jason Gunthorpe's avatar Jason Gunthorpe

RDMA/mlx5: Rework implicit_mr_get_data

This function is intended to loop across each MTT chunk in the implicit
parent that intersects the range [io_virt, io_virt+bnct).  But it is has a
confusing construction, so:

- Consistently use imr and odp_imr to refer to the implicit parent
  to avoid confusion with the normal mr and odp of the child
- Directly compute the inclusive start/end indexes by shifting. This is
  clearer to understand the intent and avoids any errors from unaligned
  values of addr
- Iterate directly over the range of MTT indexes, do not make a loop
  out of goto
- Follow 'success oriented flow', with goto error unwind
- Directly calculate the range of idx's that need update_xlt
- Ensure that any leaf MR added to the interval tree always results in an
  update to the XLT

Link: https://lore.kernel.org/r/20191009160934.3143-6-jgg@ziepe.caReviewed-by: default avatarArtemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 74bddb36
......@@ -479,78 +479,93 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
return ERR_PTR(err);
}
static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
u64 io_virt, size_t bcnt)
static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
unsigned long idx)
{
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device);
struct ib_umem_odp *odp, *result = NULL;
struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
u64 addr = io_virt & MLX5_IMR_MTT_MASK;
int nentries = 0, start_idx = 0, ret;
struct ib_umem_odp *odp;
struct mlx5_ib_mr *mtt;
mutex_lock(&odp_mr->umem_mutex);
odp = odp_lookup(addr, 1, mr);
mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n",
io_virt, bcnt, addr, odp);
next_mr:
if (likely(odp)) {
if (nentries)
nentries++;
} else {
odp = ib_umem_odp_alloc_child(odp_mr, addr, MLX5_IMR_MTT_SIZE);
if (IS_ERR(odp)) {
mutex_unlock(&odp_mr->umem_mutex);
odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem),
idx * MLX5_IMR_MTT_SIZE,
MLX5_IMR_MTT_SIZE);
if (IS_ERR(odp))
return ERR_CAST(odp);
}
mtt = implicit_mr_alloc(mr->ibmr.pd, odp, 0,
mr->access_flags);
mtt = implicit_mr_alloc(imr->ibmr.pd, odp, 0, imr->access_flags);
if (IS_ERR(mtt)) {
mutex_unlock(&odp_mr->umem_mutex);
ib_umem_odp_release(odp);
return ERR_CAST(mtt);
return mtt;
}
odp->private = mtt;
mtt->umem = &odp->umem;
mtt->mmkey.iova = addr;
mtt->parent = mr;
mtt->mmkey.iova = idx * MLX5_IMR_MTT_SIZE;
mtt->parent = imr;
INIT_WORK(&odp->work, mr_leaf_free_action);
xa_store(&dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key),
xa_store(&mtt->dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key),
&mtt->mmkey, GFP_ATOMIC);
return mtt;
}
if (!nentries)
start_idx = addr >> MLX5_IMR_MTT_SHIFT;
nentries++;
static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *imr,
u64 io_virt, size_t bcnt)
{
struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
unsigned long end_idx = (io_virt + bcnt - 1) >> MLX5_IMR_MTT_SHIFT;
unsigned long idx = io_virt >> MLX5_IMR_MTT_SHIFT;
unsigned long inv_start_idx = end_idx + 1;
unsigned long inv_len = 0;
struct ib_umem_odp *result = NULL;
struct ib_umem_odp *odp;
int ret;
mutex_lock(&odp_imr->umem_mutex);
odp = odp_lookup(idx * MLX5_IMR_MTT_SIZE, 1, imr);
for (idx = idx; idx <= end_idx; idx++) {
if (unlikely(!odp)) {
struct mlx5_ib_mr *mtt;
mtt = implicit_get_child_mr(imr, idx);
if (IS_ERR(mtt)) {
result = ERR_CAST(mtt);
goto out;
}
odp = to_ib_umem_odp(mtt->umem);
inv_start_idx = min(inv_start_idx, idx);
inv_len = idx - inv_start_idx + 1;
}
/* Return first odp if region not covered by single one */
if (likely(!result))
result = odp;
addr += MLX5_IMR_MTT_SIZE;
if (unlikely(addr < io_virt + bcnt)) {
odp = odp_next(odp);
if (odp && ib_umem_start(odp) != addr)
if (odp && ib_umem_start(odp) != idx * MLX5_IMR_MTT_SIZE)
odp = NULL;
goto next_mr;
}
if (unlikely(nentries)) {
ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0,
/*
* Any time the children in the interval tree are changed we must
* perform an update of the xlt before exiting to ensure the HW and
* the tree remains synchronized.
*/
out:
if (likely(!inv_len))
goto out_unlock;
ret = mlx5_ib_update_xlt(imr, inv_start_idx, inv_len, 0,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ATOMIC);
if (ret) {
mlx5_ib_err(dev, "Failed to update PAS\n");
mlx5_ib_err(to_mdev(imr->ibmr.pd->device),
"Failed to update PAS\n");
result = ERR_PTR(ret);
}
goto out_unlock;
}
mutex_unlock(&odp_mr->umem_mutex);
out_unlock:
mutex_unlock(&odp_imr->umem_mutex);
return result;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment