Commit 4d83de6d authored by Daniel Jordan's avatar Daniel Jordan Committed by Alex Williamson

vfio/type1: Batch page pinning

Pinning one 4K page at a time is inefficient, so do it in batches of 512
instead.  This is just an optimization with no functional change
intended, and in particular the driver still calls iommu_map() with the
largest physically contiguous range possible.

Add two fields in vfio_batch to remember where to start between calls to
vfio_pin_pages_remote(), and use vfio_batch_unpin() to handle remaining
pages in the batch in case of error.

qemu pins pages for guests around 8% faster on my test system, a
two-node Broadwell server with 128G memory per node.  The qemu process
was bound to one node with its allocations constrained there as well.

                             base               test
          guest              ----------------   ----------------
       mem (GB)   speedup    avg sec    (std)   avg sec    (std)
              1      7.4%       0.61   (0.00)      0.56   (0.00)
              2      8.3%       0.93   (0.00)      0.85   (0.00)
              4      8.4%       1.46   (0.00)      1.34   (0.00)
              8      8.6%       2.54   (0.01)      2.32   (0.00)
             16      8.3%       4.66   (0.00)      4.27   (0.01)
             32      8.3%       8.94   (0.01)      8.20   (0.01)
             64      8.2%      17.47   (0.01)     16.04   (0.03)
            120      8.5%      32.45   (0.13)     29.69   (0.01)

perf diff confirms less time spent in pup.  Here are the top ten
functions:

             Baseline  Delta Abs  Symbol

               78.63%     +6.64%  clear_page_erms
                1.50%     -1.50%  __gup_longterm_locked
                1.27%     -0.78%  __get_user_pages
                          +0.76%  kvm_zap_rmapp.constprop.0
                0.54%     -0.53%  vmacache_find
                0.55%     -0.51%  get_pfnblock_flags_mask
                0.48%     -0.48%  __get_user_pages_remote
                          +0.39%  slot_rmap_walk_next
                          +0.32%  vfio_pin_map_dma
                          +0.26%  kvm_handle_hva_range
                ...
Suggested-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: default avatarDaniel Jordan <daniel.m.jordan@oracle.com>
Signed-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent 4b6c33b3
...@@ -107,6 +107,8 @@ struct vfio_batch { ...@@ -107,6 +107,8 @@ struct vfio_batch {
struct page **pages; /* for pin_user_pages_remote */ struct page **pages; /* for pin_user_pages_remote */
struct page *fallback_page; /* if pages alloc fails */ struct page *fallback_page; /* if pages alloc fails */
int capacity; /* length of pages array */ int capacity; /* length of pages array */
int size; /* of batch currently */
int offset; /* of next entry in pages */
}; };
struct vfio_group { struct vfio_group {
...@@ -469,6 +471,9 @@ static int put_pfn(unsigned long pfn, int prot) ...@@ -469,6 +471,9 @@ static int put_pfn(unsigned long pfn, int prot)
static void vfio_batch_init(struct vfio_batch *batch) static void vfio_batch_init(struct vfio_batch *batch)
{ {
batch->size = 0;
batch->offset = 0;
if (unlikely(disable_hugepages)) if (unlikely(disable_hugepages))
goto fallback; goto fallback;
...@@ -484,6 +489,17 @@ static void vfio_batch_init(struct vfio_batch *batch) ...@@ -484,6 +489,17 @@ static void vfio_batch_init(struct vfio_batch *batch)
batch->capacity = 1; batch->capacity = 1;
} }
static void vfio_batch_unpin(struct vfio_batch *batch, struct vfio_dma *dma)
{
while (batch->size) {
unsigned long pfn = page_to_pfn(batch->pages[batch->offset]);
put_pfn(pfn, dma->prot);
batch->offset++;
batch->size--;
}
}
static void vfio_batch_fini(struct vfio_batch *batch) static void vfio_batch_fini(struct vfio_batch *batch)
{ {
if (batch->capacity == VFIO_BATCH_MAX_CAPACITY) if (batch->capacity == VFIO_BATCH_MAX_CAPACITY)
...@@ -635,65 +651,88 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, ...@@ -635,65 +651,88 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
long npage, unsigned long *pfn_base, long npage, unsigned long *pfn_base,
unsigned long limit, struct vfio_batch *batch) unsigned long limit, struct vfio_batch *batch)
{ {
unsigned long pfn = 0; unsigned long pfn;
struct mm_struct *mm = current->mm;
long ret, pinned = 0, lock_acct = 0; long ret, pinned = 0, lock_acct = 0;
bool rsvd; bool rsvd;
dma_addr_t iova = vaddr - dma->vaddr + dma->iova; dma_addr_t iova = vaddr - dma->vaddr + dma->iova;
/* This code path is only user initiated */ /* This code path is only user initiated */
if (!current->mm) if (!mm)
return -ENODEV; return -ENODEV;
ret = vaddr_get_pfns(current->mm, vaddr, 1, dma->prot, pfn_base, if (batch->size) {
batch->pages); /* Leftover pages in batch from an earlier call. */
if (ret < 0) *pfn_base = page_to_pfn(batch->pages[batch->offset]);
return ret; pfn = *pfn_base;
rsvd = is_invalid_reserved_pfn(*pfn_base);
pinned++; } else {
rsvd = is_invalid_reserved_pfn(*pfn_base); *pfn_base = 0;
/*
* Reserved pages aren't counted against the user, externally pinned
* pages are already counted against the user.
*/
if (!rsvd && !vfio_find_vpfn(dma, iova)) {
if (!dma->lock_cap && current->mm->locked_vm + 1 > limit) {
put_pfn(*pfn_base, dma->prot);
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
limit << PAGE_SHIFT);
return -ENOMEM;
}
lock_acct++;
} }
if (unlikely(disable_hugepages)) while (npage) {
goto out; if (!batch->size) {
/* Empty batch, so refill it. */
long req_pages = min_t(long, npage, batch->capacity);
/* Lock all the consecutive pages from pfn_base */ ret = vaddr_get_pfns(mm, vaddr, req_pages, dma->prot,
for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage; &pfn, batch->pages);
pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { if (ret < 0)
ret = vaddr_get_pfns(current->mm, vaddr, 1, dma->prot, &pfn, goto unpin_out;
batch->pages);
if (ret < 0)
break;
if (pfn != *pfn_base + pinned || batch->size = ret;
rsvd != is_invalid_reserved_pfn(pfn)) { batch->offset = 0;
put_pfn(pfn, dma->prot);
break; if (!*pfn_base) {
*pfn_base = pfn;
rsvd = is_invalid_reserved_pfn(*pfn_base);
}
} }
if (!rsvd && !vfio_find_vpfn(dma, iova)) { /*
if (!dma->lock_cap && * pfn is preset for the first iteration of this inner loop and
current->mm->locked_vm + lock_acct + 1 > limit) { * updated at the end to handle a VM_PFNMAP pfn. In that case,
put_pfn(pfn, dma->prot); * batch->pages isn't valid (there's no struct page), so allow
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", * batch->pages to be touched only when there's more than one
__func__, limit << PAGE_SHIFT); * pfn to check, which guarantees the pfns are from a
ret = -ENOMEM; * !VM_PFNMAP vma.
goto unpin_out; */
while (true) {
if (pfn != *pfn_base + pinned ||
rsvd != is_invalid_reserved_pfn(pfn))
goto out;
/*
* Reserved pages aren't counted against the user,
* externally pinned pages are already counted against
* the user.
*/
if (!rsvd && !vfio_find_vpfn(dma, iova)) {
if (!dma->lock_cap &&
mm->locked_vm + lock_acct + 1 > limit) {
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
__func__, limit << PAGE_SHIFT);
ret = -ENOMEM;
goto unpin_out;
}
lock_acct++;
} }
lock_acct++;
pinned++;
npage--;
vaddr += PAGE_SIZE;
iova += PAGE_SIZE;
batch->offset++;
batch->size--;
if (!batch->size)
break;
pfn = page_to_pfn(batch->pages[batch->offset]);
} }
if (unlikely(disable_hugepages))
break;
} }
out: out:
...@@ -701,10 +740,11 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, ...@@ -701,10 +740,11 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
unpin_out: unpin_out:
if (ret < 0) { if (ret < 0) {
if (!rsvd) { if (pinned && !rsvd) {
for (pfn = *pfn_base ; pinned ; pfn++, pinned--) for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
put_pfn(pfn, dma->prot); put_pfn(pfn, dma->prot);
} }
vfio_batch_unpin(batch, dma);
return ret; return ret;
} }
...@@ -1463,6 +1503,7 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, ...@@ -1463,6 +1503,7 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
if (ret) { if (ret) {
vfio_unpin_pages_remote(dma, iova + dma->size, pfn, vfio_unpin_pages_remote(dma, iova + dma->size, pfn,
npage, true); npage, true);
vfio_batch_unpin(&batch, dma);
break; break;
} }
...@@ -1726,11 +1767,13 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, ...@@ -1726,11 +1767,13 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
ret = iommu_map(domain->domain, iova, phys, ret = iommu_map(domain->domain, iova, phys,
size, dma->prot | domain->prot); size, dma->prot | domain->prot);
if (ret) { if (ret) {
if (!dma->iommu_mapped) if (!dma->iommu_mapped) {
vfio_unpin_pages_remote(dma, iova, vfio_unpin_pages_remote(dma, iova,
phys >> PAGE_SHIFT, phys >> PAGE_SHIFT,
size >> PAGE_SHIFT, size >> PAGE_SHIFT,
true); true);
vfio_batch_unpin(&batch, dma);
}
goto unwind; goto unwind;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment