Commit 05f0c03f authored by Yongji Xie's avatar Yongji Xie Committed by Alex Williamson

vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive

Current vfio-pci implementation disallows to mmap
sub-page(size < PAGE_SIZE) MMIO BARs because these BARs' mmio
page may be shared with other BARs. This will cause some
performance issues when we passthrough a PCI device with
this kind of BARs. Guest will be not able to handle the mmio
accesses to the BARs which leads to mmio emulations in host.

However, not all sub-page BARs will share page with other BARs.
We should allow to mmap the sub-page MMIO BARs which we can
make sure will not share page with other BARs.

This patch adds support for this case. And we try to add a
dummy resource to reserve the remainder of the page which
hot-add device's BAR might be assigned into. But it's not
necessary to handle the case when the BAR is not page aligned.
Because we can't expect the BAR will be assigned into the same
location in a page in guest when we passthrough the BAR. And
it's hard to access this BAR in userspace because we have
no way to get the BAR's location in a page.
Signed-off-by: default avatarYongji Xie <xyjxie@linux.vnet.ibm.com>
Signed-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent 9698cbf0
...@@ -110,6 +110,74 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev) ...@@ -110,6 +110,74 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
} }
static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev)
{
struct resource *res;
int bar;
struct vfio_pci_dummy_resource *dummy_res;
INIT_LIST_HEAD(&vdev->dummy_resources_list);
for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) {
res = vdev->pdev->resource + bar;
if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP))
goto no_mmap;
if (!(res->flags & IORESOURCE_MEM))
goto no_mmap;
/*
* The PCI core shouldn't set up a resource with a
* type but zero size. But there may be bugs that
* cause us to do that.
*/
if (!resource_size(res))
goto no_mmap;
if (resource_size(res) >= PAGE_SIZE) {
vdev->bar_mmap_supported[bar] = true;
continue;
}
if (!(res->start & ~PAGE_MASK)) {
/*
* Add a dummy resource to reserve the remainder
* of the exclusive page in case that hot-add
* device's bar is assigned into it.
*/
dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL);
if (dummy_res == NULL)
goto no_mmap;
dummy_res->resource.name = "vfio sub-page reserved";
dummy_res->resource.start = res->end + 1;
dummy_res->resource.end = res->start + PAGE_SIZE - 1;
dummy_res->resource.flags = res->flags;
if (request_resource(res->parent,
&dummy_res->resource)) {
kfree(dummy_res);
goto no_mmap;
}
dummy_res->index = bar;
list_add(&dummy_res->res_next,
&vdev->dummy_resources_list);
vdev->bar_mmap_supported[bar] = true;
continue;
}
/*
* Here we don't handle the case when the BAR is not page
* aligned because we can't expect the BAR will be
* assigned into the same location in a page in guest
* when we passthrough the BAR. And it's hard to access
* this BAR in userspace because we have no way to get
* the BAR's location in a page.
*/
no_mmap:
vdev->bar_mmap_supported[bar] = false;
}
}
static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
static void vfio_pci_disable(struct vfio_pci_device *vdev); static void vfio_pci_disable(struct vfio_pci_device *vdev);
...@@ -218,12 +286,15 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) ...@@ -218,12 +286,15 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
} }
} }
vfio_pci_probe_mmaps(vdev);
return 0; return 0;
} }
static void vfio_pci_disable(struct vfio_pci_device *vdev) static void vfio_pci_disable(struct vfio_pci_device *vdev)
{ {
struct pci_dev *pdev = vdev->pdev; struct pci_dev *pdev = vdev->pdev;
struct vfio_pci_dummy_resource *dummy_res, *tmp;
int i, bar; int i, bar;
/* Stop the device from further DMA */ /* Stop the device from further DMA */
...@@ -252,6 +323,13 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) ...@@ -252,6 +323,13 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
vdev->barmap[bar] = NULL; vdev->barmap[bar] = NULL;
} }
list_for_each_entry_safe(dummy_res, tmp,
&vdev->dummy_resources_list, res_next) {
list_del(&dummy_res->res_next);
release_resource(&dummy_res->resource);
kfree(dummy_res);
}
vdev->needs_reset = true; vdev->needs_reset = true;
/* /*
...@@ -623,9 +701,7 @@ static long vfio_pci_ioctl(void *device_data, ...@@ -623,9 +701,7 @@ static long vfio_pci_ioctl(void *device_data,
info.flags = VFIO_REGION_INFO_FLAG_READ | info.flags = VFIO_REGION_INFO_FLAG_READ |
VFIO_REGION_INFO_FLAG_WRITE; VFIO_REGION_INFO_FLAG_WRITE;
if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) && if (vdev->bar_mmap_supported[info.index]) {
pci_resource_flags(pdev, info.index) &
IORESOURCE_MEM && info.size >= PAGE_SIZE) {
info.flags |= VFIO_REGION_INFO_FLAG_MMAP; info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
if (info.index == vdev->msix_bar) { if (info.index == vdev->msix_bar) {
ret = msix_sparse_mmap_cap(vdev, &caps); ret = msix_sparse_mmap_cap(vdev, &caps);
...@@ -1049,16 +1125,16 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) ...@@ -1049,16 +1125,16 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
return -EINVAL; return -EINVAL;
if (index >= VFIO_PCI_ROM_REGION_INDEX) if (index >= VFIO_PCI_ROM_REGION_INDEX)
return -EINVAL; return -EINVAL;
if (!(pci_resource_flags(pdev, index) & IORESOURCE_MEM)) if (!vdev->bar_mmap_supported[index])
return -EINVAL; return -EINVAL;
phys_len = pci_resource_len(pdev, index); phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));
req_len = vma->vm_end - vma->vm_start; req_len = vma->vm_end - vma->vm_start;
pgoff = vma->vm_pgoff & pgoff = vma->vm_pgoff &
((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
req_start = pgoff << PAGE_SHIFT; req_start = pgoff << PAGE_SHIFT;
if (phys_len < PAGE_SIZE || req_start + req_len > phys_len) if (req_start + req_len > phys_len)
return -EINVAL; return -EINVAL;
if (index == vdev->msix_bar) { if (index == vdev->msix_bar) {
......
...@@ -57,9 +57,16 @@ struct vfio_pci_region { ...@@ -57,9 +57,16 @@ struct vfio_pci_region {
u32 flags; u32 flags;
}; };
struct vfio_pci_dummy_resource {
struct resource resource;
int index;
struct list_head res_next;
};
struct vfio_pci_device { struct vfio_pci_device {
struct pci_dev *pdev; struct pci_dev *pdev;
void __iomem *barmap[PCI_STD_RESOURCE_END + 1]; void __iomem *barmap[PCI_STD_RESOURCE_END + 1];
bool bar_mmap_supported[PCI_STD_RESOURCE_END + 1];
u8 *pci_config_map; u8 *pci_config_map;
u8 *vconfig; u8 *vconfig;
struct perm_bits *msi_perm; struct perm_bits *msi_perm;
...@@ -88,6 +95,7 @@ struct vfio_pci_device { ...@@ -88,6 +95,7 @@ struct vfio_pci_device {
int refcnt; int refcnt;
struct eventfd_ctx *err_trigger; struct eventfd_ctx *err_trigger;
struct eventfd_ctx *req_trigger; struct eventfd_ctx *req_trigger;
struct list_head dummy_resources_list;
}; };
#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX) #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment