Commit 36f30e48 authored by Yishai Hadas's avatar Yishai Hadas Committed by Jason Gunthorpe

IB/core: Improve ODP to use hmm_range_fault()

Move to use hmm_range_fault() instead of get_user_pags_remote() to improve
performance in a few aspects:

This includes:
- Dropping the need to allocate and free memory to hold its output

- No need any more to use put_page() to unpin the pages

- The logic to detect contiguous pages is done based on the returned
  order, no need to run per page and evaluate.

In addition, moving to use hmm_range_fault() enables to reduce page faults
in the system with it's snapshot mode, this will be introduced in next
patches from this series.

As part of this, cleanup some flows and use the required data structures
to work with hmm_range_fault().

Link: https://lore.kernel.org/r/20200930163828.1336747-2-leon@kernel.orgSigned-off-by: default avatarYishai Hadas <yishaih@nvidia.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@nvidia.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 2ee9bf34
......@@ -48,6 +48,7 @@ config INFINIBAND_ON_DEMAND_PAGING
depends on INFINIBAND_USER_MEM
select MMU_NOTIFIER
select INTERVAL_TREE
select HMM_MIRROR
default y
help
On demand paging support for the InfiniBand subsystem.
......
This diff is collapsed.
......@@ -671,7 +671,6 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
{
int page_shift, ret, np;
bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
unsigned long current_seq;
u64 access_mask;
u64 start_idx;
......@@ -682,25 +681,16 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
if (odp->umem.writable && !downgrade)
access_mask |= ODP_WRITE_ALLOWED_BIT;
current_seq = mmu_interval_read_begin(&odp->notifier);
np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask,
current_seq);
np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask);
if (np < 0)
return np;
mutex_lock(&odp->umem_mutex);
if (!mmu_interval_read_retry(&odp->notifier, current_seq)) {
/*
* No need to check whether the MTTs really belong to
* this MR, since ib_umem_odp_map_dma_pages already
* checks this.
*/
ret = mlx5_ib_update_xlt(mr, start_idx, np,
page_shift, MLX5_IB_UPD_XLT_ATOMIC);
} else {
ret = -EAGAIN;
}
/*
* No need to check whether the MTTs really belong to this MR, since
* ib_umem_odp_map_dma_and_lock already checks this.
*/
ret = mlx5_ib_update_xlt(mr, start_idx, np, page_shift,
MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&odp->umem_mutex);
if (ret < 0) {
......
......@@ -14,17 +14,13 @@ struct ib_umem_odp {
struct mmu_interval_notifier notifier;
struct pid *tgid;
/* An array of the pfns included in the on-demand paging umem. */
unsigned long *pfn_list;
/*
* An array of the pages included in the on-demand paging umem.
* Indices of pages that are currently not mapped into the device will
* contain NULL.
*/
struct page **page_list;
/*
* An array of the same size as page_list, with DMA addresses mapped
* for pages the pages in page_list. The lower two bits designate
* access permissions. See ODP_READ_ALLOWED_BIT and
* ODP_WRITE_ALLOWED_BIT.
* An array with DMA addresses mapped for pfns in pfn_list.
* The lower two bits designate access permissions.
* See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT.
*/
dma_addr_t *dma_list;
/*
......@@ -97,9 +93,8 @@ ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, unsigned long addr,
const struct mmu_interval_notifier_ops *ops);
void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
u64 bcnt, u64 access_mask,
unsigned long current_seq);
int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 start_offset,
u64 bcnt, u64 access_mask);
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
u64 bound);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment