Commit 79e542f5 authored by Changbin Du's avatar Changbin Du Committed by Zhenyu Wang

drm/i915/kvmgt: Support setting dma map for huge pages

To support huge gtt, we need to support huge pages in kvmgt first.
This patch adds a 'size' param to the intel_gvt_mpt::dma_map_guest_page
API and implements it in kvmgt.

v2: rebase.
Signed-off-by: default avatarChangbin Du <changbin.du@intel.com>
Signed-off-by: default avatarZhenyu Wang <zhenyuw@linux.intel.com>
parent eb3a3530
...@@ -1106,7 +1106,7 @@ static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, ...@@ -1106,7 +1106,7 @@ static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
start_gfn + i, &dma_addr); start_gfn + i, PAGE_SIZE, &dma_addr);
if (ret) if (ret)
return ret; return ret;
...@@ -1152,7 +1152,7 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, ...@@ -1152,7 +1152,7 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
}; };
/* direct shadow */ /* direct shadow */
ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, &dma_addr); ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr);
if (ret) if (ret)
return -ENXIO; return -ENXIO;
...@@ -2080,7 +2080,7 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, ...@@ -2080,7 +2080,7 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
} }
ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn,
&dma_addr); PAGE_SIZE, &dma_addr);
if (ret) { if (ret) {
gvt_vgpu_err("fail to populate guest ggtt entry\n"); gvt_vgpu_err("fail to populate guest ggtt entry\n");
/* guest driver may read/write the entry when partial /* guest driver may read/write the entry when partial
......
...@@ -53,7 +53,7 @@ struct intel_gvt_mpt { ...@@ -53,7 +53,7 @@ struct intel_gvt_mpt {
unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn); unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn);
int (*dma_map_guest_page)(unsigned long handle, unsigned long gfn, int (*dma_map_guest_page)(unsigned long handle, unsigned long gfn,
dma_addr_t *dma_addr); unsigned long size, dma_addr_t *dma_addr);
void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr); void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr);
int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn, int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn,
......
...@@ -94,6 +94,7 @@ struct gvt_dma { ...@@ -94,6 +94,7 @@ struct gvt_dma {
struct rb_node dma_addr_node; struct rb_node dma_addr_node;
gfn_t gfn; gfn_t gfn;
dma_addr_t dma_addr; dma_addr_t dma_addr;
unsigned long size;
struct kref ref; struct kref ref;
}; };
...@@ -106,45 +107,103 @@ static int kvmgt_guest_init(struct mdev_device *mdev); ...@@ -106,45 +107,103 @@ static int kvmgt_guest_init(struct mdev_device *mdev);
static void intel_vgpu_release_work(struct work_struct *work); static void intel_vgpu_release_work(struct work_struct *work);
static bool kvmgt_guest_exit(struct kvmgt_guest_info *info); static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
dma_addr_t *dma_addr) unsigned long size)
{ {
struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; int total_pages;
struct page *page; int npage;
unsigned long pfn;
int ret; int ret;
/* Pin the page first. */ total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1,
for (npage = 0; npage < total_pages; npage++) {
unsigned long cur_gfn = gfn + npage;
ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1);
WARN_ON(ret != 1);
}
}
/* Pin a normal or compound guest page for dma. */
static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
unsigned long size, struct page **page)
{
unsigned long base_pfn = 0;
int total_pages;
int npage;
int ret;
total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
/*
* We pin the pages one-by-one to avoid allocating a big arrary
* on stack to hold pfns.
*/
for (npage = 0; npage < total_pages; npage++) {
unsigned long cur_gfn = gfn + npage;
unsigned long pfn;
ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1,
IOMMU_READ | IOMMU_WRITE, &pfn); IOMMU_READ | IOMMU_WRITE, &pfn);
if (ret != 1) { if (ret != 1) {
gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n",
gfn, ret); cur_gfn, ret);
return -EINVAL; goto err;
} }
/* Setup DMA mapping. */ if (!pfn_valid(pfn)) {
page = pfn_to_page(pfn); gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn);
*dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, npage++;
PCI_DMA_BIDIRECTIONAL); ret = -EFAULT;
if (dma_mapping_error(dev, *dma_addr)) { goto err;
gvt_vgpu_err("DMA mapping failed for gfn 0x%lx\n", gfn); }
vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1);
return -ENOMEM; if (npage == 0)
base_pfn = pfn;
else if (base_pfn + npage != pfn) {
gvt_vgpu_err("The pages are not continuous\n");
ret = -EINVAL;
npage++;
goto err;
}
} }
*page = pfn_to_page(base_pfn);
return 0; return 0;
err:
gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
return ret;
} }
static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn, static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
dma_addr_t dma_addr) dma_addr_t *dma_addr, unsigned long size)
{ {
struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
struct page *page = NULL;
int ret; int ret;
dma_unmap_page(dev, dma_addr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); ret = gvt_pin_guest_page(vgpu, gfn, size, &page);
ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); if (ret)
WARN_ON(ret != 1); return ret;
/* Setup DMA mapping. */
*dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL);
ret = dma_mapping_error(dev, *dma_addr);
if (ret) {
gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n",
page_to_pfn(page), ret);
gvt_unpin_guest_page(vgpu, gfn, size);
}
return ret;
}
static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn,
dma_addr_t dma_addr, unsigned long size)
{
struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
dma_unmap_page(dev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
gvt_unpin_guest_page(vgpu, gfn, size);
} }
static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu, static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu,
...@@ -185,7 +244,7 @@ static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn) ...@@ -185,7 +244,7 @@ static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn)
} }
static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
dma_addr_t dma_addr) dma_addr_t dma_addr, unsigned long size)
{ {
struct gvt_dma *new, *itr; struct gvt_dma *new, *itr;
struct rb_node **link, *parent = NULL; struct rb_node **link, *parent = NULL;
...@@ -197,6 +256,7 @@ static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, ...@@ -197,6 +256,7 @@ static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
new->vgpu = vgpu; new->vgpu = vgpu;
new->gfn = gfn; new->gfn = gfn;
new->dma_addr = dma_addr; new->dma_addr = dma_addr;
new->size = size;
kref_init(&new->ref); kref_init(&new->ref);
/* gfn_cache maps gfn to struct gvt_dma. */ /* gfn_cache maps gfn to struct gvt_dma. */
...@@ -254,7 +314,7 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu) ...@@ -254,7 +314,7 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu)
break; break;
} }
dma = rb_entry(node, struct gvt_dma, gfn_node); dma = rb_entry(node, struct gvt_dma, gfn_node);
gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr); gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size);
__gvt_cache_remove_entry(vgpu, dma); __gvt_cache_remove_entry(vgpu, dma);
mutex_unlock(&vgpu->vdev.cache_lock); mutex_unlock(&vgpu->vdev.cache_lock);
} }
...@@ -509,7 +569,8 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb, ...@@ -509,7 +569,8 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
if (!entry) if (!entry)
continue; continue;
gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr); gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
entry->size);
__gvt_cache_remove_entry(vgpu, entry); __gvt_cache_remove_entry(vgpu, entry);
} }
mutex_unlock(&vgpu->vdev.cache_lock); mutex_unlock(&vgpu->vdev.cache_lock);
...@@ -1616,7 +1677,7 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) ...@@ -1616,7 +1677,7 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
} }
int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
dma_addr_t *dma_addr) unsigned long size, dma_addr_t *dma_addr)
{ {
struct kvmgt_guest_info *info; struct kvmgt_guest_info *info;
struct intel_vgpu *vgpu; struct intel_vgpu *vgpu;
...@@ -1633,11 +1694,11 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, ...@@ -1633,11 +1694,11 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
entry = __gvt_cache_find_gfn(info->vgpu, gfn); entry = __gvt_cache_find_gfn(info->vgpu, gfn);
if (!entry) { if (!entry) {
ret = gvt_dma_map_page(vgpu, gfn, dma_addr); ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
if (ret) if (ret)
goto err_unlock; goto err_unlock;
ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr); ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size);
if (ret) if (ret)
goto err_unmap; goto err_unmap;
} else { } else {
...@@ -1649,7 +1710,7 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, ...@@ -1649,7 +1710,7 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
return 0; return 0;
err_unmap: err_unmap:
gvt_dma_unmap_page(vgpu, gfn, *dma_addr); gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size);
err_unlock: err_unlock:
mutex_unlock(&info->vgpu->vdev.cache_lock); mutex_unlock(&info->vgpu->vdev.cache_lock);
return ret; return ret;
...@@ -1659,7 +1720,8 @@ static void __gvt_dma_release(struct kref *ref) ...@@ -1659,7 +1720,8 @@ static void __gvt_dma_release(struct kref *ref)
{ {
struct gvt_dma *entry = container_of(ref, typeof(*entry), ref); struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr); gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr,
entry->size);
__gvt_cache_remove_entry(entry->vgpu, entry); __gvt_cache_remove_entry(entry->vgpu, entry);
} }
......
...@@ -230,17 +230,18 @@ static inline unsigned long intel_gvt_hypervisor_gfn_to_mfn( ...@@ -230,17 +230,18 @@ static inline unsigned long intel_gvt_hypervisor_gfn_to_mfn(
/** /**
* intel_gvt_hypervisor_dma_map_guest_page - setup dma map for guest page * intel_gvt_hypervisor_dma_map_guest_page - setup dma map for guest page
* @vgpu: a vGPU * @vgpu: a vGPU
* @gpfn: guest pfn * @gfn: guest pfn
* @size: page size
* @dma_addr: retrieve allocated dma addr * @dma_addr: retrieve allocated dma addr
* *
* Returns: * Returns:
* 0 on success, negative error code if failed. * 0 on success, negative error code if failed.
*/ */
static inline int intel_gvt_hypervisor_dma_map_guest_page( static inline int intel_gvt_hypervisor_dma_map_guest_page(
struct intel_vgpu *vgpu, unsigned long gfn, struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size,
dma_addr_t *dma_addr) dma_addr_t *dma_addr)
{ {
return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn, return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn, size,
dma_addr); dma_addr);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment