Commit 34a255e6 authored by Nicolin Chen's avatar Nicolin Chen Committed by Alex Williamson

vfio: Replace phys_pfn with pages for vfio_pin_pages()

Most of the callers of vfio_pin_pages() want "struct page *" and the
low-level mm code to pin pages returns a list of "struct page *" too.
So there's no gain in converting "struct page *" to PFN in between.

Replace the output parameter "phys_pfn" list with a "pages" list, to
simplify callers. This also allows us to replace the vfio_iommu_type1
implementation with a more efficient one.

And drop the pfn_valid check in the gvt code, as there is no need to
do such a check at a page-backed struct page pointer.

For now, also update vfio_iommu_type1 to fit this new parameter too.
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarKirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Acked-by: default avatarEric Farman <farman@linux.ibm.com>
Tested-by: default avatarTerrence Xu <terrence.xu@intel.com>
Tested-by: default avatarEric Farman <farman@linux.ibm.com>
Signed-off-by: default avatarNicolin Chen <nicolinc@nvidia.com>
Link: https://lore.kernel.org/r/20220723020256.30081-11-nicolinc@nvidia.comSigned-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent c2863feb
...@@ -263,7 +263,7 @@ The following APIs are provided for translating user pfn to host pfn in a VFIO ...@@ -263,7 +263,7 @@ The following APIs are provided for translating user pfn to host pfn in a VFIO
driver:: driver::
int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
int npage, int prot, unsigned long *phys_pfn); int npage, int prot, struct page **pages);
void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova,
int npage); int npage);
......
...@@ -240,7 +240,7 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, ...@@ -240,7 +240,7 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
unsigned long size, struct page **page) unsigned long size, struct page **page)
{ {
int total_pages = DIV_ROUND_UP(size, PAGE_SIZE); int total_pages = DIV_ROUND_UP(size, PAGE_SIZE);
unsigned long base_pfn = 0; struct page *base_page = NULL;
int npage; int npage;
int ret; int ret;
...@@ -250,26 +250,19 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, ...@@ -250,26 +250,19 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
*/ */
for (npage = 0; npage < total_pages; npage++) { for (npage = 0; npage < total_pages; npage++) {
dma_addr_t cur_iova = (gfn + npage) << PAGE_SHIFT; dma_addr_t cur_iova = (gfn + npage) << PAGE_SHIFT;
unsigned long pfn; struct page *cur_page;
ret = vfio_pin_pages(&vgpu->vfio_device, cur_iova, 1, ret = vfio_pin_pages(&vgpu->vfio_device, cur_iova, 1,
IOMMU_READ | IOMMU_WRITE, &pfn); IOMMU_READ | IOMMU_WRITE, &cur_page);
if (ret != 1) { if (ret != 1) {
gvt_vgpu_err("vfio_pin_pages failed for iova %pad, ret %d\n", gvt_vgpu_err("vfio_pin_pages failed for iova %pad, ret %d\n",
&cur_iova, ret); &cur_iova, ret);
goto err; goto err;
} }
if (!pfn_valid(pfn)) {
gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn);
npage++;
ret = -EFAULT;
goto err;
}
if (npage == 0) if (npage == 0)
base_pfn = pfn; base_page = cur_page;
else if (base_pfn + npage != pfn) { else if (base_page + npage != cur_page) {
gvt_vgpu_err("The pages are not continuous\n"); gvt_vgpu_err("The pages are not continuous\n");
ret = -EINVAL; ret = -EINVAL;
npage++; npage++;
...@@ -277,7 +270,7 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, ...@@ -277,7 +270,7 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
} }
} }
*page = pfn_to_page(base_pfn); *page = base_page;
return 0; return 0;
err: err:
gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE); gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
......
...@@ -22,8 +22,8 @@ ...@@ -22,8 +22,8 @@
struct page_array { struct page_array {
/* Array that stores pages need to pin. */ /* Array that stores pages need to pin. */
dma_addr_t *pa_iova; dma_addr_t *pa_iova;
/* Array that receives PFNs of the pages pinned. */ /* Array that receives the pinned pages. */
unsigned long *pa_pfn; struct page **pa_page;
/* Number of pages pinned from @pa_iova. */ /* Number of pages pinned from @pa_iova. */
int pa_nr; int pa_nr;
}; };
...@@ -68,19 +68,19 @@ static int page_array_alloc(struct page_array *pa, u64 iova, unsigned int len) ...@@ -68,19 +68,19 @@ static int page_array_alloc(struct page_array *pa, u64 iova, unsigned int len)
return -EINVAL; return -EINVAL;
pa->pa_iova = kcalloc(pa->pa_nr, pa->pa_iova = kcalloc(pa->pa_nr,
sizeof(*pa->pa_iova) + sizeof(*pa->pa_pfn), sizeof(*pa->pa_iova) + sizeof(*pa->pa_page),
GFP_KERNEL); GFP_KERNEL);
if (unlikely(!pa->pa_iova)) { if (unlikely(!pa->pa_iova)) {
pa->pa_nr = 0; pa->pa_nr = 0;
return -ENOMEM; return -ENOMEM;
} }
pa->pa_pfn = (unsigned long *)&pa->pa_iova[pa->pa_nr]; pa->pa_page = (struct page **)&pa->pa_iova[pa->pa_nr];
pa->pa_iova[0] = iova; pa->pa_iova[0] = iova;
pa->pa_pfn[0] = -1ULL; pa->pa_page[0] = NULL;
for (i = 1; i < pa->pa_nr; i++) { for (i = 1; i < pa->pa_nr; i++) {
pa->pa_iova[i] = pa->pa_iova[i - 1] + PAGE_SIZE; pa->pa_iova[i] = pa->pa_iova[i - 1] + PAGE_SIZE;
pa->pa_pfn[i] = -1ULL; pa->pa_page[i] = NULL;
} }
return 0; return 0;
...@@ -144,7 +144,7 @@ static int page_array_pin(struct page_array *pa, struct vfio_device *vdev) ...@@ -144,7 +144,7 @@ static int page_array_pin(struct page_array *pa, struct vfio_device *vdev)
ret = vfio_pin_pages(vdev, *first, npage, ret = vfio_pin_pages(vdev, *first, npage,
IOMMU_READ | IOMMU_WRITE, IOMMU_READ | IOMMU_WRITE,
&pa->pa_pfn[pinned]); &pa->pa_page[pinned]);
if (ret < 0) { if (ret < 0) {
goto err_out; goto err_out;
} else if (ret > 0 && ret != npage) { } else if (ret > 0 && ret != npage) {
...@@ -195,7 +195,7 @@ static inline void page_array_idal_create_words(struct page_array *pa, ...@@ -195,7 +195,7 @@ static inline void page_array_idal_create_words(struct page_array *pa,
*/ */
for (i = 0; i < pa->pa_nr; i++) for (i = 0; i < pa->pa_nr; i++)
idaws[i] = pa->pa_pfn[i] << PAGE_SHIFT; idaws[i] = page_to_phys(pa->pa_page[i]);
/* Adjust the first IDAW, since it may not start on a page boundary */ /* Adjust the first IDAW, since it may not start on a page boundary */
idaws[0] += pa->pa_iova[0] & (PAGE_SIZE - 1); idaws[0] += pa->pa_iova[0] & (PAGE_SIZE - 1);
...@@ -246,8 +246,7 @@ static long copy_from_iova(struct vfio_device *vdev, void *to, u64 iova, ...@@ -246,8 +246,7 @@ static long copy_from_iova(struct vfio_device *vdev, void *to, u64 iova,
l = n; l = n;
for (i = 0; i < pa.pa_nr; i++) { for (i = 0; i < pa.pa_nr; i++) {
struct page *page = pfn_to_page(pa.pa_pfn[i]); void *from = kmap_local_page(pa.pa_page[i]);
void *from = kmap_local_page(page);
m = PAGE_SIZE; m = PAGE_SIZE;
if (i == 0) { if (i == 0) {
......
...@@ -234,9 +234,9 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, ...@@ -234,9 +234,9 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
struct ap_qirq_ctrl aqic_gisa = {}; struct ap_qirq_ctrl aqic_gisa = {};
struct ap_queue_status status = {}; struct ap_queue_status status = {};
struct kvm_s390_gisa *gisa; struct kvm_s390_gisa *gisa;
struct page *h_page;
int nisc; int nisc;
struct kvm *kvm; struct kvm *kvm;
unsigned long h_pfn;
phys_addr_t h_nib; phys_addr_t h_nib;
dma_addr_t nib; dma_addr_t nib;
int ret; int ret;
...@@ -251,7 +251,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, ...@@ -251,7 +251,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
} }
ret = vfio_pin_pages(&q->matrix_mdev->vdev, nib, 1, ret = vfio_pin_pages(&q->matrix_mdev->vdev, nib, 1,
IOMMU_READ | IOMMU_WRITE, &h_pfn); IOMMU_READ | IOMMU_WRITE, &h_page);
switch (ret) { switch (ret) {
case 1: case 1:
break; break;
...@@ -267,7 +267,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, ...@@ -267,7 +267,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
kvm = q->matrix_mdev->kvm; kvm = q->matrix_mdev->kvm;
gisa = kvm->arch.gisa_int.origin; gisa = kvm->arch.gisa_int.origin;
h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK); h_nib = page_to_phys(h_page) | (nib & ~PAGE_MASK);
aqic_gisa.gisc = isc; aqic_gisa.gisc = isc;
nisc = kvm_s390_gisc_register(kvm, isc); nisc = kvm_s390_gisc_register(kvm, isc);
......
...@@ -1941,18 +1941,18 @@ EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); ...@@ -1941,18 +1941,18 @@ EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
* @npage [in] : count of pages to be pinned. This count should not * @npage [in] : count of pages to be pinned. This count should not
* be greater than VFIO_PIN_PAGES_MAX_ENTRIES. * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
* @prot [in] : protection flags * @prot [in] : protection flags
* @phys_pfn[out]: array of host PFNs * @pages[out] : array of host pages
* Return error or number of pages pinned. * Return error or number of pages pinned.
*/ */
int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
int npage, int prot, unsigned long *phys_pfn) int npage, int prot, struct page **pages)
{ {
struct vfio_container *container; struct vfio_container *container;
struct vfio_group *group = device->group; struct vfio_group *group = device->group;
struct vfio_iommu_driver *driver; struct vfio_iommu_driver *driver;
int ret; int ret;
if (!phys_pfn || !npage || !vfio_assert_device_open(device)) if (!pages || !npage || !vfio_assert_device_open(device))
return -EINVAL; return -EINVAL;
if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
...@@ -1967,7 +1967,7 @@ int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, ...@@ -1967,7 +1967,7 @@ int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
if (likely(driver && driver->ops->pin_pages)) if (likely(driver && driver->ops->pin_pages))
ret = driver->ops->pin_pages(container->iommu_data, ret = driver->ops->pin_pages(container->iommu_data,
group->iommu_group, iova, group->iommu_group, iova,
npage, prot, phys_pfn); npage, prot, pages);
else else
ret = -ENOTTY; ret = -ENOTTY;
......
...@@ -52,7 +52,7 @@ struct vfio_iommu_driver_ops { ...@@ -52,7 +52,7 @@ struct vfio_iommu_driver_ops {
struct iommu_group *group, struct iommu_group *group,
dma_addr_t user_iova, dma_addr_t user_iova,
int npage, int prot, int npage, int prot,
unsigned long *phys_pfn); struct page **pages);
void (*unpin_pages)(void *iommu_data, void (*unpin_pages)(void *iommu_data,
dma_addr_t user_iova, int npage); dma_addr_t user_iova, int npage);
void (*register_device)(void *iommu_data, void (*register_device)(void *iommu_data,
......
...@@ -831,7 +831,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, ...@@ -831,7 +831,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
struct iommu_group *iommu_group, struct iommu_group *iommu_group,
dma_addr_t user_iova, dma_addr_t user_iova,
int npage, int prot, int npage, int prot,
unsigned long *phys_pfn) struct page **pages)
{ {
struct vfio_iommu *iommu = iommu_data; struct vfio_iommu *iommu = iommu_data;
struct vfio_iommu_group *group; struct vfio_iommu_group *group;
...@@ -841,7 +841,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, ...@@ -841,7 +841,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
bool do_accounting; bool do_accounting;
dma_addr_t iova; dma_addr_t iova;
if (!iommu || !phys_pfn) if (!iommu || !pages)
return -EINVAL; return -EINVAL;
/* Supported for v2 version only */ /* Supported for v2 version only */
...@@ -880,6 +880,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, ...@@ -880,6 +880,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
do_accounting = list_empty(&iommu->domain_list); do_accounting = list_empty(&iommu->domain_list);
for (i = 0; i < npage; i++) { for (i = 0; i < npage; i++) {
unsigned long phys_pfn;
struct vfio_pfn *vpfn; struct vfio_pfn *vpfn;
iova = user_iova + PAGE_SIZE * i; iova = user_iova + PAGE_SIZE * i;
...@@ -896,23 +897,25 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, ...@@ -896,23 +897,25 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
vpfn = vfio_iova_get_vfio_pfn(dma, iova); vpfn = vfio_iova_get_vfio_pfn(dma, iova);
if (vpfn) { if (vpfn) {
phys_pfn[i] = vpfn->pfn; pages[i] = pfn_to_page(vpfn->pfn);
continue; continue;
} }
remote_vaddr = dma->vaddr + (iova - dma->iova); remote_vaddr = dma->vaddr + (iova - dma->iova);
ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i], ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn,
do_accounting); do_accounting);
if (ret) if (ret)
goto pin_unwind; goto pin_unwind;
ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]); ret = vfio_add_to_pfn_list(dma, iova, phys_pfn);
if (ret) { if (ret) {
if (put_pfn(phys_pfn[i], dma->prot) && do_accounting) if (put_pfn(phys_pfn, dma->prot) && do_accounting)
vfio_lock_acct(dma, -1, true); vfio_lock_acct(dma, -1, true);
goto pin_unwind; goto pin_unwind;
} }
pages[i] = pfn_to_page(phys_pfn);
if (iommu->dirty_page_tracking) { if (iommu->dirty_page_tracking) {
unsigned long pgshift = __ffs(iommu->pgsize_bitmap); unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
...@@ -935,14 +938,14 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, ...@@ -935,14 +938,14 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
goto pin_done; goto pin_done;
pin_unwind: pin_unwind:
phys_pfn[i] = 0; pages[i] = NULL;
for (j = 0; j < i; j++) { for (j = 0; j < i; j++) {
dma_addr_t iova; dma_addr_t iova;
iova = user_iova + PAGE_SIZE * j; iova = user_iova + PAGE_SIZE * j;
dma = vfio_find_dma(iommu, iova, PAGE_SIZE); dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
vfio_unpin_page_external(dma, iova, do_accounting); vfio_unpin_page_external(dma, iova, do_accounting);
phys_pfn[j] = 0; pages[j] = NULL;
} }
pin_done: pin_done:
mutex_unlock(&iommu->lock); mutex_unlock(&iommu->lock);
......
...@@ -162,7 +162,7 @@ bool vfio_file_has_dev(struct file *file, struct vfio_device *device); ...@@ -162,7 +162,7 @@ bool vfio_file_has_dev(struct file *file, struct vfio_device *device);
#define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long)) #define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long))
int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
int npage, int prot, unsigned long *phys_pfn); int npage, int prot, struct page **pages);
void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage); void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage);
int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova,
void *data, size_t len, bool write); void *data, size_t len, bool write);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment