Commit b25f62cc authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfio-v6.5-rc1' of https://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - Adjust log levels for common messages (Oleksandr Natalenko, Alex
   Williamson)

 - Support for dynamic MSI-X allocation (Reinette Chatre)

 - Enable and report PCIe AtomicOp Completer capabilities (Alex
   Williamson)

 - Cleanup Kconfigs for vfio bus drivers (Alex Williamson)

 - Add support for CDX bus based devices (Nipun Gupta)

 - Fix race with concurrent mdev initialization (Eric Farman)

* tag 'vfio-v6.5-rc1' of https://github.com/awilliam/linux-vfio:
  vfio/mdev: Move the compat_class initialization to module init
  vfio/cdx: add support for CDX bus
  vfio/fsl: Create Kconfig sub-menu
  vfio/platform: Cleanup Kconfig
  vfio/pci: Cleanup Kconfig
  vfio/pci-core: Add capability for AtomicOp completer support
  vfio/pci: Also demote hiding standard cap messages
  vfio/pci: Clear VFIO_IRQ_INFO_NORESIZE for MSI-X
  vfio/pci: Support dynamic MSI-X
  vfio/pci: Probe and store ability to support dynamic MSI-X
  vfio/pci: Use bitfield for struct vfio_pci_core_device flags
  vfio/pci: Update stale comment
  vfio/pci: Remove interrupt context counter
  vfio/pci: Use xarray for interrupt context storage
  vfio/pci: Move to single error path
  vfio/pci: Prepare for dynamic interrupt context storage
  vfio/pci: Remove negative check on unsigned vector
  vfio/pci: Consolidate irq cleanup on MSI/MSI-X disable
  vfio/pci: demote hiding ecap messages to debug level
parents 9070577a ff598081
...@@ -22254,6 +22254,13 @@ F: Documentation/filesystems/vfat.rst ...@@ -22254,6 +22254,13 @@ F: Documentation/filesystems/vfat.rst
F: fs/fat/ F: fs/fat/
F: tools/testing/selftests/filesystems/fat/ F: tools/testing/selftests/filesystems/fat/
VFIO CDX DRIVER
M: Nipun Gupta <nipun.gupta@amd.com>
M: Nikhil Agarwal <nikhil.agarwal@amd.com>
L: kvm@vger.kernel.org
S: Maintained
F: drivers/vfio/cdx/*
VFIO DRIVER VFIO DRIVER
M: Alex Williamson <alex.williamson@redhat.com> M: Alex Williamson <alex.williamson@redhat.com>
L: kvm@vger.kernel.org L: kvm@vger.kernel.org
......
...@@ -57,6 +57,7 @@ source "drivers/vfio/pci/Kconfig" ...@@ -57,6 +57,7 @@ source "drivers/vfio/pci/Kconfig"
source "drivers/vfio/platform/Kconfig" source "drivers/vfio/platform/Kconfig"
source "drivers/vfio/mdev/Kconfig" source "drivers/vfio/mdev/Kconfig"
source "drivers/vfio/fsl-mc/Kconfig" source "drivers/vfio/fsl-mc/Kconfig"
source "drivers/vfio/cdx/Kconfig"
endif endif
source "virt/lib/Kconfig" source "virt/lib/Kconfig"
...@@ -10,7 +10,8 @@ vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o ...@@ -10,7 +10,8 @@ vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o
obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
obj-$(CONFIG_VFIO_PCI) += pci/ obj-$(CONFIG_VFIO_PCI_CORE) += pci/
obj-$(CONFIG_VFIO_PLATFORM) += platform/ obj-$(CONFIG_VFIO_PLATFORM_BASE) += platform/
obj-$(CONFIG_VFIO_MDEV) += mdev/ obj-$(CONFIG_VFIO_MDEV) += mdev/
obj-$(CONFIG_VFIO_FSL_MC) += fsl-mc/ obj-$(CONFIG_VFIO_FSL_MC) += fsl-mc/
obj-$(CONFIG_VFIO_CDX) += cdx/
# SPDX-License-Identifier: GPL-2.0
#
# VFIO CDX configuration
#
# Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
#
config VFIO_CDX
tristate "VFIO support for CDX bus devices"
depends on CDX_BUS
select EVENTFD
help
Driver to enable VFIO support for the devices on CDX bus.
This is required to make use of CDX devices present in
the system using the VFIO framework.
If you don't know what to do here, say N.
# SPDX-License-Identifier: GPL-2.0
#
# Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
#
obj-$(CONFIG_VFIO_CDX) += vfio-cdx.o
vfio-cdx-objs := main.o
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
*/
#include <linux/vfio.h>
#include <linux/cdx/cdx_bus.h>
#include "private.h"
static int vfio_cdx_open_device(struct vfio_device *core_vdev)
{
struct vfio_cdx_device *vdev =
container_of(core_vdev, struct vfio_cdx_device, vdev);
struct cdx_device *cdx_dev = to_cdx_device(core_vdev->dev);
int count = cdx_dev->res_count;
int i;
vdev->regions = kcalloc(count, sizeof(struct vfio_cdx_region),
GFP_KERNEL_ACCOUNT);
if (!vdev->regions)
return -ENOMEM;
for (i = 0; i < count; i++) {
struct resource *res = &cdx_dev->res[i];
vdev->regions[i].addr = res->start;
vdev->regions[i].size = resource_size(res);
vdev->regions[i].type = res->flags;
/*
* Only regions addressed with PAGE granularity may be
* MMAP'ed securely.
*/
if (!(vdev->regions[i].addr & ~PAGE_MASK) &&
!(vdev->regions[i].size & ~PAGE_MASK))
vdev->regions[i].flags |=
VFIO_REGION_INFO_FLAG_MMAP;
vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
if (!(cdx_dev->res[i].flags & IORESOURCE_READONLY))
vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
}
return 0;
}
static void vfio_cdx_close_device(struct vfio_device *core_vdev)
{
struct vfio_cdx_device *vdev =
container_of(core_vdev, struct vfio_cdx_device, vdev);
kfree(vdev->regions);
cdx_dev_reset(core_vdev->dev);
}
static int vfio_cdx_ioctl_get_info(struct vfio_cdx_device *vdev,
struct vfio_device_info __user *arg)
{
unsigned long minsz = offsetofend(struct vfio_device_info, num_irqs);
struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
struct vfio_device_info info;
if (copy_from_user(&info, arg, minsz))
return -EFAULT;
if (info.argsz < minsz)
return -EINVAL;
info.flags = VFIO_DEVICE_FLAGS_CDX;
info.flags |= VFIO_DEVICE_FLAGS_RESET;
info.num_regions = cdx_dev->res_count;
info.num_irqs = 0;
return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
}
static int vfio_cdx_ioctl_get_region_info(struct vfio_cdx_device *vdev,
struct vfio_region_info __user *arg)
{
unsigned long minsz = offsetofend(struct vfio_region_info, offset);
struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
struct vfio_region_info info;
if (copy_from_user(&info, arg, minsz))
return -EFAULT;
if (info.argsz < minsz)
return -EINVAL;
if (info.index >= cdx_dev->res_count)
return -EINVAL;
/* map offset to the physical address */
info.offset = vfio_cdx_index_to_offset(info.index);
info.size = vdev->regions[info.index].size;
info.flags = vdev->regions[info.index].flags;
return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
}
static long vfio_cdx_ioctl(struct vfio_device *core_vdev,
unsigned int cmd, unsigned long arg)
{
struct vfio_cdx_device *vdev =
container_of(core_vdev, struct vfio_cdx_device, vdev);
void __user *uarg = (void __user *)arg;
switch (cmd) {
case VFIO_DEVICE_GET_INFO:
return vfio_cdx_ioctl_get_info(vdev, uarg);
case VFIO_DEVICE_GET_REGION_INFO:
return vfio_cdx_ioctl_get_region_info(vdev, uarg);
case VFIO_DEVICE_RESET:
return cdx_dev_reset(core_vdev->dev);
default:
return -ENOTTY;
}
}
static int vfio_cdx_mmap_mmio(struct vfio_cdx_region region,
struct vm_area_struct *vma)
{
u64 size = vma->vm_end - vma->vm_start;
u64 pgoff, base;
pgoff = vma->vm_pgoff &
((1U << (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
base = pgoff << PAGE_SHIFT;
if (base + size > region.size)
return -EINVAL;
vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff;
vma->vm_page_prot = pgprot_device(vma->vm_page_prot);
return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
size, vma->vm_page_prot);
}
static int vfio_cdx_mmap(struct vfio_device *core_vdev,
struct vm_area_struct *vma)
{
struct vfio_cdx_device *vdev =
container_of(core_vdev, struct vfio_cdx_device, vdev);
struct cdx_device *cdx_dev = to_cdx_device(core_vdev->dev);
unsigned int index;
index = vma->vm_pgoff >> (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT);
if (index >= cdx_dev->res_count)
return -EINVAL;
if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP))
return -EINVAL;
if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ) &&
(vma->vm_flags & VM_READ))
return -EPERM;
if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE) &&
(vma->vm_flags & VM_WRITE))
return -EPERM;
return vfio_cdx_mmap_mmio(vdev->regions[index], vma);
}
static const struct vfio_device_ops vfio_cdx_ops = {
.name = "vfio-cdx",
.open_device = vfio_cdx_open_device,
.close_device = vfio_cdx_close_device,
.ioctl = vfio_cdx_ioctl,
.mmap = vfio_cdx_mmap,
.bind_iommufd = vfio_iommufd_physical_bind,
.unbind_iommufd = vfio_iommufd_physical_unbind,
.attach_ioas = vfio_iommufd_physical_attach_ioas,
};
static int vfio_cdx_probe(struct cdx_device *cdx_dev)
{
struct vfio_cdx_device *vdev;
struct device *dev = &cdx_dev->dev;
int ret;
vdev = vfio_alloc_device(vfio_cdx_device, vdev, dev,
&vfio_cdx_ops);
if (IS_ERR(vdev))
return PTR_ERR(vdev);
ret = vfio_register_group_dev(&vdev->vdev);
if (ret)
goto out_uninit;
dev_set_drvdata(dev, vdev);
return 0;
out_uninit:
vfio_put_device(&vdev->vdev);
return ret;
}
static int vfio_cdx_remove(struct cdx_device *cdx_dev)
{
struct device *dev = &cdx_dev->dev;
struct vfio_cdx_device *vdev = dev_get_drvdata(dev);
vfio_unregister_group_dev(&vdev->vdev);
vfio_put_device(&vdev->vdev);
return 0;
}
static const struct cdx_device_id vfio_cdx_table[] = {
{ CDX_DEVICE_DRIVER_OVERRIDE(CDX_ANY_ID, CDX_ANY_ID,
CDX_ID_F_VFIO_DRIVER_OVERRIDE) }, /* match all by default */
{}
};
MODULE_DEVICE_TABLE(cdx, vfio_cdx_table);
static struct cdx_driver vfio_cdx_driver = {
.probe = vfio_cdx_probe,
.remove = vfio_cdx_remove,
.match_id_table = vfio_cdx_table,
.driver = {
.name = "vfio-cdx",
.owner = THIS_MODULE,
},
.driver_managed_dma = true,
};
module_driver(vfio_cdx_driver, cdx_driver_register, cdx_driver_unregister);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("VFIO for CDX devices - User Level meta-driver");
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
*/
#ifndef VFIO_CDX_PRIVATE_H
#define VFIO_CDX_PRIVATE_H
#define VFIO_CDX_OFFSET_SHIFT 40
static inline u64 vfio_cdx_index_to_offset(u32 index)
{
return ((u64)(index) << VFIO_CDX_OFFSET_SHIFT);
}
struct vfio_cdx_region {
u32 flags;
u32 type;
u64 addr;
resource_size_t size;
};
struct vfio_cdx_device {
struct vfio_device vdev;
struct vfio_cdx_region *regions;
};
#endif /* VFIO_CDX_PRIVATE_H */
menu "VFIO support for FSL_MC bus devices"
depends on FSL_MC_BUS
config VFIO_FSL_MC config VFIO_FSL_MC
tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices" tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices"
depends on FSL_MC_BUS
select EVENTFD select EVENTFD
help help
Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc
...@@ -8,3 +10,5 @@ config VFIO_FSL_MC ...@@ -8,3 +10,5 @@ config VFIO_FSL_MC
fsl-mc bus devices using the VFIO framework. fsl-mc bus devices using the VFIO framework.
If you don't know what to do here, say N. If you don't know what to do here, say N.
endmenu
...@@ -72,12 +72,6 @@ int mdev_register_parent(struct mdev_parent *parent, struct device *dev, ...@@ -72,12 +72,6 @@ int mdev_register_parent(struct mdev_parent *parent, struct device *dev,
parent->nr_types = nr_types; parent->nr_types = nr_types;
atomic_set(&parent->available_instances, mdev_driver->max_instances); atomic_set(&parent->available_instances, mdev_driver->max_instances);
if (!mdev_bus_compat_class) {
mdev_bus_compat_class = class_compat_register("mdev_bus");
if (!mdev_bus_compat_class)
return -ENOMEM;
}
ret = parent_create_sysfs_files(parent); ret = parent_create_sysfs_files(parent);
if (ret) if (ret)
return ret; return ret;
...@@ -251,13 +245,24 @@ int mdev_device_remove(struct mdev_device *mdev) ...@@ -251,13 +245,24 @@ int mdev_device_remove(struct mdev_device *mdev)
static int __init mdev_init(void) static int __init mdev_init(void)
{ {
return bus_register(&mdev_bus_type); int ret;
ret = bus_register(&mdev_bus_type);
if (ret)
return ret;
mdev_bus_compat_class = class_compat_register("mdev_bus");
if (!mdev_bus_compat_class) {
bus_unregister(&mdev_bus_type);
return -ENOMEM;
}
return 0;
} }
static void __exit mdev_exit(void) static void __exit mdev_exit(void)
{ {
if (mdev_bus_compat_class) class_compat_unregister(mdev_bus_compat_class);
class_compat_unregister(mdev_bus_compat_class);
bus_unregister(&mdev_bus_type); bus_unregister(&mdev_bus_type);
} }
......
# SPDX-License-Identifier: GPL-2.0-only # SPDX-License-Identifier: GPL-2.0-only
if PCI && MMU menu "VFIO support for PCI devices"
depends on PCI && MMU
config VFIO_PCI_CORE config VFIO_PCI_CORE
tristate tristate
select VFIO_VIRQFD select VFIO_VIRQFD
...@@ -7,9 +9,11 @@ config VFIO_PCI_CORE ...@@ -7,9 +9,11 @@ config VFIO_PCI_CORE
config VFIO_PCI_MMAP config VFIO_PCI_MMAP
def_bool y if !S390 def_bool y if !S390
depends on VFIO_PCI_CORE
config VFIO_PCI_INTX config VFIO_PCI_INTX
def_bool y if !S390 def_bool y if !S390
depends on VFIO_PCI_CORE
config VFIO_PCI config VFIO_PCI
tristate "Generic VFIO support for any PCI device" tristate "Generic VFIO support for any PCI device"
...@@ -59,4 +63,4 @@ source "drivers/vfio/pci/mlx5/Kconfig" ...@@ -59,4 +63,4 @@ source "drivers/vfio/pci/mlx5/Kconfig"
source "drivers/vfio/pci/hisilicon/Kconfig" source "drivers/vfio/pci/hisilicon/Kconfig"
endif endmenu
# SPDX-License-Identifier: GPL-2.0-only # SPDX-License-Identifier: GPL-2.0-only
config HISI_ACC_VFIO_PCI config HISI_ACC_VFIO_PCI
tristate "VFIO PCI support for HiSilicon ACC devices" tristate "VFIO support for HiSilicon ACC PCI devices"
depends on ARM64 || (COMPILE_TEST && 64BIT) depends on ARM64 || (COMPILE_TEST && 64BIT)
depends on VFIO_PCI_CORE
depends on PCI_MSI depends on PCI_MSI
depends on CRYPTO_DEV_HISI_QM depends on CRYPTO_DEV_HISI_QM
depends on CRYPTO_DEV_HISI_HPRE depends on CRYPTO_DEV_HISI_HPRE
depends on CRYPTO_DEV_HISI_SEC2 depends on CRYPTO_DEV_HISI_SEC2
depends on CRYPTO_DEV_HISI_ZIP depends on CRYPTO_DEV_HISI_ZIP
select VFIO_PCI_CORE
help help
This provides generic PCI support for HiSilicon ACC devices This provides generic PCI support for HiSilicon ACC devices
using the VFIO framework. using the VFIO framework.
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
config MLX5_VFIO_PCI config MLX5_VFIO_PCI
tristate "VFIO support for MLX5 PCI devices" tristate "VFIO support for MLX5 PCI devices"
depends on MLX5_CORE depends on MLX5_CORE
depends on VFIO_PCI_CORE select VFIO_PCI_CORE
help help
This provides migration support for MLX5 devices using the VFIO This provides migration support for MLX5 devices using the VFIO
framework. framework.
......
...@@ -1566,8 +1566,8 @@ static int vfio_cap_init(struct vfio_pci_core_device *vdev) ...@@ -1566,8 +1566,8 @@ static int vfio_cap_init(struct vfio_pci_core_device *vdev)
} }
if (!len) { if (!len) {
pci_info(pdev, "%s: hiding cap %#x@%#x\n", __func__, pci_dbg(pdev, "%s: hiding cap %#x@%#x\n", __func__,
cap, pos); cap, pos);
*prev = next; *prev = next;
pos = next; pos = next;
continue; continue;
...@@ -1643,8 +1643,8 @@ static int vfio_ecap_init(struct vfio_pci_core_device *vdev) ...@@ -1643,8 +1643,8 @@ static int vfio_ecap_init(struct vfio_pci_core_device *vdev)
} }
if (!len) { if (!len) {
pci_info(pdev, "%s: hiding ecap %#x@%#x\n", pci_dbg(pdev, "%s: hiding ecap %#x@%#x\n",
__func__, ecap, epos); __func__, ecap, epos);
/* If not the first in the chain, we can skip over it */ /* If not the first in the chain, we can skip over it */
if (prev) { if (prev) {
......
...@@ -530,8 +530,11 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev) ...@@ -530,8 +530,11 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
vdev->msix_bar = table & PCI_MSIX_TABLE_BIR; vdev->msix_bar = table & PCI_MSIX_TABLE_BIR;
vdev->msix_offset = table & PCI_MSIX_TABLE_OFFSET; vdev->msix_offset = table & PCI_MSIX_TABLE_OFFSET;
vdev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * 16; vdev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * 16;
} else vdev->has_dyn_msix = pci_msix_can_alloc_dyn(pdev);
} else {
vdev->msix_bar = 0xFF; vdev->msix_bar = 0xFF;
vdev->has_dyn_msix = false;
}
if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev)) if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
vdev->has_vga = true; vdev->has_vga = true;
...@@ -882,6 +885,37 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev, ...@@ -882,6 +885,37 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
} }
EXPORT_SYMBOL_GPL(vfio_pci_core_register_dev_region); EXPORT_SYMBOL_GPL(vfio_pci_core_register_dev_region);
static int vfio_pci_info_atomic_cap(struct vfio_pci_core_device *vdev,
struct vfio_info_cap *caps)
{
struct vfio_device_info_cap_pci_atomic_comp cap = {
.header.id = VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP,
.header.version = 1
};
struct pci_dev *pdev = pci_physfn(vdev->pdev);
u32 devcap2;
pcie_capability_read_dword(pdev, PCI_EXP_DEVCAP2, &devcap2);
if ((devcap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP32) &&
!pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32))
cap.flags |= VFIO_PCI_ATOMIC_COMP32;
if ((devcap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP64) &&
!pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64))
cap.flags |= VFIO_PCI_ATOMIC_COMP64;
if ((devcap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP128) &&
!pci_enable_atomic_ops_to_root(pdev,
PCI_EXP_DEVCAP2_ATOMIC_COMP128))
cap.flags |= VFIO_PCI_ATOMIC_COMP128;
if (!cap.flags)
return -ENODEV;
return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
}
static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev, static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
struct vfio_device_info __user *arg) struct vfio_device_info __user *arg)
{ {
...@@ -920,6 +954,13 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev, ...@@ -920,6 +954,13 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
return ret; return ret;
} }
ret = vfio_pci_info_atomic_cap(vdev, &caps);
if (ret && ret != -ENODEV) {
pci_warn(vdev->pdev,
"Failed to setup AtomicOps info capability\n");
return ret;
}
if (caps.size) { if (caps.size) {
info.flags |= VFIO_DEVICE_FLAGS_CAPS; info.flags |= VFIO_DEVICE_FLAGS_CAPS;
if (info.argsz < sizeof(info) + caps.size) { if (info.argsz < sizeof(info) + caps.size) {
...@@ -1111,7 +1152,7 @@ static int vfio_pci_ioctl_get_irq_info(struct vfio_pci_core_device *vdev, ...@@ -1111,7 +1152,7 @@ static int vfio_pci_ioctl_get_irq_info(struct vfio_pci_core_device *vdev,
if (info.index == VFIO_PCI_INTX_IRQ_INDEX) if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
info.flags |= info.flags |=
(VFIO_IRQ_INFO_MASKABLE | VFIO_IRQ_INFO_AUTOMASKED); (VFIO_IRQ_INFO_MASKABLE | VFIO_IRQ_INFO_AUTOMASKED);
else else if (info.index != VFIO_PCI_MSIX_IRQ_INDEX || !vdev->has_dyn_msix)
info.flags |= VFIO_IRQ_INFO_NORESIZE; info.flags |= VFIO_IRQ_INFO_NORESIZE;
return copy_to_user(arg, &info, minsz) ? -EFAULT : 0; return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
...@@ -2102,6 +2143,7 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev) ...@@ -2102,6 +2143,7 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
INIT_LIST_HEAD(&vdev->vma_list); INIT_LIST_HEAD(&vdev->vma_list);
INIT_LIST_HEAD(&vdev->sriov_pfs_item); INIT_LIST_HEAD(&vdev->sriov_pfs_item);
init_rwsem(&vdev->memory_lock); init_rwsem(&vdev->memory_lock);
xa_init(&vdev->ctx);
return 0; return 0;
} }
......
...@@ -48,6 +48,39 @@ static bool is_irq_none(struct vfio_pci_core_device *vdev) ...@@ -48,6 +48,39 @@ static bool is_irq_none(struct vfio_pci_core_device *vdev)
vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX); vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX);
} }
static
struct vfio_pci_irq_ctx *vfio_irq_ctx_get(struct vfio_pci_core_device *vdev,
unsigned long index)
{
return xa_load(&vdev->ctx, index);
}
static void vfio_irq_ctx_free(struct vfio_pci_core_device *vdev,
struct vfio_pci_irq_ctx *ctx, unsigned long index)
{
xa_erase(&vdev->ctx, index);
kfree(ctx);
}
static struct vfio_pci_irq_ctx *
vfio_irq_ctx_alloc(struct vfio_pci_core_device *vdev, unsigned long index)
{
struct vfio_pci_irq_ctx *ctx;
int ret;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
if (!ctx)
return NULL;
ret = xa_insert(&vdev->ctx, index, ctx, GFP_KERNEL_ACCOUNT);
if (ret) {
kfree(ctx);
return NULL;
}
return ctx;
}
/* /*
* INTx * INTx
*/ */
...@@ -55,14 +88,21 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused) ...@@ -55,14 +88,21 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused)
{ {
struct vfio_pci_core_device *vdev = opaque; struct vfio_pci_core_device *vdev = opaque;
if (likely(is_intx(vdev) && !vdev->virq_disabled)) if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
eventfd_signal(vdev->ctx[0].trigger, 1); struct vfio_pci_irq_ctx *ctx;
ctx = vfio_irq_ctx_get(vdev, 0);
if (WARN_ON_ONCE(!ctx))
return;
eventfd_signal(ctx->trigger, 1);
}
} }
/* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */ /* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */
bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
{ {
struct pci_dev *pdev = vdev->pdev; struct pci_dev *pdev = vdev->pdev;
struct vfio_pci_irq_ctx *ctx;
unsigned long flags; unsigned long flags;
bool masked_changed = false; bool masked_changed = false;
...@@ -77,7 +117,14 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) ...@@ -77,7 +117,14 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
if (unlikely(!is_intx(vdev))) { if (unlikely(!is_intx(vdev))) {
if (vdev->pci_2_3) if (vdev->pci_2_3)
pci_intx(pdev, 0); pci_intx(pdev, 0);
} else if (!vdev->ctx[0].masked) { goto out_unlock;
}
ctx = vfio_irq_ctx_get(vdev, 0);
if (WARN_ON_ONCE(!ctx))
goto out_unlock;
if (!ctx->masked) {
/* /*
* Can't use check_and_mask here because we always want to * Can't use check_and_mask here because we always want to
* mask, not just when something is pending. * mask, not just when something is pending.
...@@ -87,10 +134,11 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) ...@@ -87,10 +134,11 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
else else
disable_irq_nosync(pdev->irq); disable_irq_nosync(pdev->irq);
vdev->ctx[0].masked = true; ctx->masked = true;
masked_changed = true; masked_changed = true;
} }
out_unlock:
spin_unlock_irqrestore(&vdev->irqlock, flags); spin_unlock_irqrestore(&vdev->irqlock, flags);
return masked_changed; return masked_changed;
} }
...@@ -105,6 +153,7 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) ...@@ -105,6 +153,7 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
{ {
struct vfio_pci_core_device *vdev = opaque; struct vfio_pci_core_device *vdev = opaque;
struct pci_dev *pdev = vdev->pdev; struct pci_dev *pdev = vdev->pdev;
struct vfio_pci_irq_ctx *ctx;
unsigned long flags; unsigned long flags;
int ret = 0; int ret = 0;
...@@ -117,7 +166,14 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) ...@@ -117,7 +166,14 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
if (unlikely(!is_intx(vdev))) { if (unlikely(!is_intx(vdev))) {
if (vdev->pci_2_3) if (vdev->pci_2_3)
pci_intx(pdev, 1); pci_intx(pdev, 1);
} else if (vdev->ctx[0].masked && !vdev->virq_disabled) { goto out_unlock;
}
ctx = vfio_irq_ctx_get(vdev, 0);
if (WARN_ON_ONCE(!ctx))
goto out_unlock;
if (ctx->masked && !vdev->virq_disabled) {
/* /*
* A pending interrupt here would immediately trigger, * A pending interrupt here would immediately trigger,
* but we can avoid that overhead by just re-sending * but we can avoid that overhead by just re-sending
...@@ -129,9 +185,10 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) ...@@ -129,9 +185,10 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
} else } else
enable_irq(pdev->irq); enable_irq(pdev->irq);
vdev->ctx[0].masked = (ret > 0); ctx->masked = (ret > 0);
} }
out_unlock:
spin_unlock_irqrestore(&vdev->irqlock, flags); spin_unlock_irqrestore(&vdev->irqlock, flags);
return ret; return ret;
...@@ -146,18 +203,23 @@ void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) ...@@ -146,18 +203,23 @@ void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
static irqreturn_t vfio_intx_handler(int irq, void *dev_id) static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
{ {
struct vfio_pci_core_device *vdev = dev_id; struct vfio_pci_core_device *vdev = dev_id;
struct vfio_pci_irq_ctx *ctx;
unsigned long flags; unsigned long flags;
int ret = IRQ_NONE; int ret = IRQ_NONE;
ctx = vfio_irq_ctx_get(vdev, 0);
if (WARN_ON_ONCE(!ctx))
return ret;
spin_lock_irqsave(&vdev->irqlock, flags); spin_lock_irqsave(&vdev->irqlock, flags);
if (!vdev->pci_2_3) { if (!vdev->pci_2_3) {
disable_irq_nosync(vdev->pdev->irq); disable_irq_nosync(vdev->pdev->irq);
vdev->ctx[0].masked = true; ctx->masked = true;
ret = IRQ_HANDLED; ret = IRQ_HANDLED;
} else if (!vdev->ctx[0].masked && /* may be shared */ } else if (!ctx->masked && /* may be shared */
pci_check_and_mask_intx(vdev->pdev)) { pci_check_and_mask_intx(vdev->pdev)) {
vdev->ctx[0].masked = true; ctx->masked = true;
ret = IRQ_HANDLED; ret = IRQ_HANDLED;
} }
...@@ -171,27 +233,27 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id) ...@@ -171,27 +233,27 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
static int vfio_intx_enable(struct vfio_pci_core_device *vdev) static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
{ {
struct vfio_pci_irq_ctx *ctx;
if (!is_irq_none(vdev)) if (!is_irq_none(vdev))
return -EINVAL; return -EINVAL;
if (!vdev->pdev->irq) if (!vdev->pdev->irq)
return -ENODEV; return -ENODEV;
vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT); ctx = vfio_irq_ctx_alloc(vdev, 0);
if (!vdev->ctx) if (!ctx)
return -ENOMEM; return -ENOMEM;
vdev->num_ctx = 1;
/* /*
* If the virtual interrupt is masked, restore it. Devices * If the virtual interrupt is masked, restore it. Devices
* supporting DisINTx can be masked at the hardware level * supporting DisINTx can be masked at the hardware level
* here, non-PCI-2.3 devices will have to wait until the * here, non-PCI-2.3 devices will have to wait until the
* interrupt is enabled. * interrupt is enabled.
*/ */
vdev->ctx[0].masked = vdev->virq_disabled; ctx->masked = vdev->virq_disabled;
if (vdev->pci_2_3) if (vdev->pci_2_3)
pci_intx(vdev->pdev, !vdev->ctx[0].masked); pci_intx(vdev->pdev, !ctx->masked);
vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX; vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
...@@ -202,41 +264,46 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) ...@@ -202,41 +264,46 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
{ {
struct pci_dev *pdev = vdev->pdev; struct pci_dev *pdev = vdev->pdev;
unsigned long irqflags = IRQF_SHARED; unsigned long irqflags = IRQF_SHARED;
struct vfio_pci_irq_ctx *ctx;
struct eventfd_ctx *trigger; struct eventfd_ctx *trigger;
unsigned long flags; unsigned long flags;
int ret; int ret;
if (vdev->ctx[0].trigger) { ctx = vfio_irq_ctx_get(vdev, 0);
if (WARN_ON_ONCE(!ctx))
return -EINVAL;
if (ctx->trigger) {
free_irq(pdev->irq, vdev); free_irq(pdev->irq, vdev);
kfree(vdev->ctx[0].name); kfree(ctx->name);
eventfd_ctx_put(vdev->ctx[0].trigger); eventfd_ctx_put(ctx->trigger);
vdev->ctx[0].trigger = NULL; ctx->trigger = NULL;
} }
if (fd < 0) /* Disable only */ if (fd < 0) /* Disable only */
return 0; return 0;
vdev->ctx[0].name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)",
pci_name(pdev)); pci_name(pdev));
if (!vdev->ctx[0].name) if (!ctx->name)
return -ENOMEM; return -ENOMEM;
trigger = eventfd_ctx_fdget(fd); trigger = eventfd_ctx_fdget(fd);
if (IS_ERR(trigger)) { if (IS_ERR(trigger)) {
kfree(vdev->ctx[0].name); kfree(ctx->name);
return PTR_ERR(trigger); return PTR_ERR(trigger);
} }
vdev->ctx[0].trigger = trigger; ctx->trigger = trigger;
if (!vdev->pci_2_3) if (!vdev->pci_2_3)
irqflags = 0; irqflags = 0;
ret = request_irq(pdev->irq, vfio_intx_handler, ret = request_irq(pdev->irq, vfio_intx_handler,
irqflags, vdev->ctx[0].name, vdev); irqflags, ctx->name, vdev);
if (ret) { if (ret) {
vdev->ctx[0].trigger = NULL; ctx->trigger = NULL;
kfree(vdev->ctx[0].name); kfree(ctx->name);
eventfd_ctx_put(trigger); eventfd_ctx_put(trigger);
return ret; return ret;
} }
...@@ -246,7 +313,7 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) ...@@ -246,7 +313,7 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
* disable_irq won't. * disable_irq won't.
*/ */
spin_lock_irqsave(&vdev->irqlock, flags); spin_lock_irqsave(&vdev->irqlock, flags);
if (!vdev->pci_2_3 && vdev->ctx[0].masked) if (!vdev->pci_2_3 && ctx->masked)
disable_irq_nosync(pdev->irq); disable_irq_nosync(pdev->irq);
spin_unlock_irqrestore(&vdev->irqlock, flags); spin_unlock_irqrestore(&vdev->irqlock, flags);
...@@ -255,12 +322,17 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) ...@@ -255,12 +322,17 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
static void vfio_intx_disable(struct vfio_pci_core_device *vdev) static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
{ {
vfio_virqfd_disable(&vdev->ctx[0].unmask); struct vfio_pci_irq_ctx *ctx;
vfio_virqfd_disable(&vdev->ctx[0].mask);
ctx = vfio_irq_ctx_get(vdev, 0);
WARN_ON_ONCE(!ctx);
if (ctx) {
vfio_virqfd_disable(&ctx->unmask);
vfio_virqfd_disable(&ctx->mask);
}
vfio_intx_set_signal(vdev, -1); vfio_intx_set_signal(vdev, -1);
vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->irq_type = VFIO_PCI_NUM_IRQS;
vdev->num_ctx = 0; vfio_irq_ctx_free(vdev, ctx, 0);
kfree(vdev->ctx);
} }
/* /*
...@@ -284,11 +356,6 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi ...@@ -284,11 +356,6 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
if (!is_irq_none(vdev)) if (!is_irq_none(vdev))
return -EINVAL; return -EINVAL;
vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx),
GFP_KERNEL_ACCOUNT);
if (!vdev->ctx)
return -ENOMEM;
/* return the number of supported vectors if we can't get all: */ /* return the number of supported vectors if we can't get all: */
cmd = vfio_pci_memory_lock_and_enable(vdev); cmd = vfio_pci_memory_lock_and_enable(vdev);
ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag); ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
...@@ -296,12 +363,10 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi ...@@ -296,12 +363,10 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
if (ret > 0) if (ret > 0)
pci_free_irq_vectors(pdev); pci_free_irq_vectors(pdev);
vfio_pci_memory_unlock_and_restore(vdev, cmd); vfio_pci_memory_unlock_and_restore(vdev, cmd);
kfree(vdev->ctx);
return ret; return ret;
} }
vfio_pci_memory_unlock_and_restore(vdev, cmd); vfio_pci_memory_unlock_and_restore(vdev, cmd);
vdev->num_ctx = nvec;
vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX : vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
VFIO_PCI_MSI_IRQ_INDEX; VFIO_PCI_MSI_IRQ_INDEX;
...@@ -316,53 +381,91 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi ...@@ -316,53 +381,91 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
return 0; return 0;
} }
static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, /*
int vector, int fd, bool msix) * vfio_msi_alloc_irq() returns the Linux IRQ number of an MSI or MSI-X device
* interrupt vector. If a Linux IRQ number is not available then a new
* interrupt is allocated if dynamic MSI-X is supported.
*
* Where is vfio_msi_free_irq()? Allocated interrupts are maintained,
* essentially forming a cache that subsequent allocations can draw from.
* Interrupts are freed using pci_free_irq_vectors() when MSI/MSI-X is
* disabled.
*/
static int vfio_msi_alloc_irq(struct vfio_pci_core_device *vdev,
unsigned int vector, bool msix)
{ {
struct pci_dev *pdev = vdev->pdev; struct pci_dev *pdev = vdev->pdev;
struct eventfd_ctx *trigger; struct msi_map map;
int irq, ret; int irq;
u16 cmd; u16 cmd;
if (vector < 0 || vector >= vdev->num_ctx) irq = pci_irq_vector(pdev, vector);
if (WARN_ON_ONCE(irq == 0))
return -EINVAL; return -EINVAL;
if (irq > 0 || !msix || !vdev->has_dyn_msix)
return irq;
irq = pci_irq_vector(pdev, vector); cmd = vfio_pci_memory_lock_and_enable(vdev);
map = pci_msix_alloc_irq_at(pdev, vector, NULL);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
if (vdev->ctx[vector].trigger) { return map.index < 0 ? map.index : map.virq;
irq_bypass_unregister_producer(&vdev->ctx[vector].producer); }
static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
unsigned int vector, int fd, bool msix)
{
struct pci_dev *pdev = vdev->pdev;
struct vfio_pci_irq_ctx *ctx;
struct eventfd_ctx *trigger;
int irq = -EINVAL, ret;
u16 cmd;
ctx = vfio_irq_ctx_get(vdev, vector);
if (ctx) {
irq_bypass_unregister_producer(&ctx->producer);
irq = pci_irq_vector(pdev, vector);
cmd = vfio_pci_memory_lock_and_enable(vdev); cmd = vfio_pci_memory_lock_and_enable(vdev);
free_irq(irq, vdev->ctx[vector].trigger); free_irq(irq, ctx->trigger);
vfio_pci_memory_unlock_and_restore(vdev, cmd); vfio_pci_memory_unlock_and_restore(vdev, cmd);
/* Interrupt stays allocated, will be freed at MSI-X disable. */
kfree(vdev->ctx[vector].name); kfree(ctx->name);
eventfd_ctx_put(vdev->ctx[vector].trigger); eventfd_ctx_put(ctx->trigger);
vdev->ctx[vector].trigger = NULL; vfio_irq_ctx_free(vdev, ctx, vector);
} }
if (fd < 0) if (fd < 0)
return 0; return 0;
vdev->ctx[vector].name = kasprintf(GFP_KERNEL_ACCOUNT, if (irq == -EINVAL) {
"vfio-msi%s[%d](%s)", /* Interrupt stays allocated, will be freed at MSI-X disable. */
msix ? "x" : "", vector, irq = vfio_msi_alloc_irq(vdev, vector, msix);
pci_name(pdev)); if (irq < 0)
if (!vdev->ctx[vector].name) return irq;
}
ctx = vfio_irq_ctx_alloc(vdev, vector);
if (!ctx)
return -ENOMEM; return -ENOMEM;
ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-msi%s[%d](%s)",
msix ? "x" : "", vector, pci_name(pdev));
if (!ctx->name) {
ret = -ENOMEM;
goto out_free_ctx;
}
trigger = eventfd_ctx_fdget(fd); trigger = eventfd_ctx_fdget(fd);
if (IS_ERR(trigger)) { if (IS_ERR(trigger)) {
kfree(vdev->ctx[vector].name); ret = PTR_ERR(trigger);
return PTR_ERR(trigger); goto out_free_name;
} }
/* /*
* The MSIx vector table resides in device memory which may be cleared * If the vector was previously allocated, refresh the on-device
* via backdoor resets. We don't allow direct access to the vector * message data before enabling in case it had been cleared or
* table so even if a userspace driver attempts to save/restore around * corrupted (e.g. due to backdoor resets) since writing.
* such a reset it would be unsuccessful. To avoid this, restore the
* cached value of the message prior to enabling.
*/ */
cmd = vfio_pci_memory_lock_and_enable(vdev); cmd = vfio_pci_memory_lock_and_enable(vdev);
if (msix) { if (msix) {
...@@ -372,37 +475,39 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, ...@@ -372,37 +475,39 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
pci_write_msi_msg(irq, &msg); pci_write_msi_msg(irq, &msg);
} }
ret = request_irq(irq, vfio_msihandler, 0, ret = request_irq(irq, vfio_msihandler, 0, ctx->name, trigger);
vdev->ctx[vector].name, trigger);
vfio_pci_memory_unlock_and_restore(vdev, cmd); vfio_pci_memory_unlock_and_restore(vdev, cmd);
if (ret) { if (ret)
kfree(vdev->ctx[vector].name); goto out_put_eventfd_ctx;
eventfd_ctx_put(trigger);
return ret;
}
vdev->ctx[vector].producer.token = trigger; ctx->producer.token = trigger;
vdev->ctx[vector].producer.irq = irq; ctx->producer.irq = irq;
ret = irq_bypass_register_producer(&vdev->ctx[vector].producer); ret = irq_bypass_register_producer(&ctx->producer);
if (unlikely(ret)) { if (unlikely(ret)) {
dev_info(&pdev->dev, dev_info(&pdev->dev,
"irq bypass producer (token %p) registration fails: %d\n", "irq bypass producer (token %p) registration fails: %d\n",
vdev->ctx[vector].producer.token, ret); ctx->producer.token, ret);
vdev->ctx[vector].producer.token = NULL; ctx->producer.token = NULL;
} }
vdev->ctx[vector].trigger = trigger; ctx->trigger = trigger;
return 0; return 0;
out_put_eventfd_ctx:
eventfd_ctx_put(trigger);
out_free_name:
kfree(ctx->name);
out_free_ctx:
vfio_irq_ctx_free(vdev, ctx, vector);
return ret;
} }
static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start, static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
unsigned count, int32_t *fds, bool msix) unsigned count, int32_t *fds, bool msix)
{ {
int i, j, ret = 0; unsigned int i, j;
int ret = 0;
if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
return -EINVAL;
for (i = 0, j = start; i < count && !ret; i++, j++) { for (i = 0, j = start; i < count && !ret; i++, j++) {
int fd = fds ? fds[i] : -1; int fd = fds ? fds[i] : -1;
...@@ -410,8 +515,8 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start, ...@@ -410,8 +515,8 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
} }
if (ret) { if (ret) {
for (--j; j >= (int)start; j--) for (i = start; i < j; i++)
vfio_msi_set_vector_signal(vdev, j, -1, msix); vfio_msi_set_vector_signal(vdev, i, -1, msix);
} }
return ret; return ret;
...@@ -420,16 +525,16 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start, ...@@ -420,16 +525,16 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix) static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
{ {
struct pci_dev *pdev = vdev->pdev; struct pci_dev *pdev = vdev->pdev;
int i; struct vfio_pci_irq_ctx *ctx;
unsigned long i;
u16 cmd; u16 cmd;
for (i = 0; i < vdev->num_ctx; i++) { xa_for_each(&vdev->ctx, i, ctx) {
vfio_virqfd_disable(&vdev->ctx[i].unmask); vfio_virqfd_disable(&ctx->unmask);
vfio_virqfd_disable(&vdev->ctx[i].mask); vfio_virqfd_disable(&ctx->mask);
vfio_msi_set_vector_signal(vdev, i, -1, msix);
} }
vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
cmd = vfio_pci_memory_lock_and_enable(vdev); cmd = vfio_pci_memory_lock_and_enable(vdev);
pci_free_irq_vectors(pdev); pci_free_irq_vectors(pdev);
vfio_pci_memory_unlock_and_restore(vdev, cmd); vfio_pci_memory_unlock_and_restore(vdev, cmd);
...@@ -442,8 +547,6 @@ static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix) ...@@ -442,8 +547,6 @@ static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
pci_intx(pdev, 0); pci_intx(pdev, 0);
vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->irq_type = VFIO_PCI_NUM_IRQS;
vdev->num_ctx = 0;
kfree(vdev->ctx);
} }
/* /*
...@@ -463,14 +566,18 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev, ...@@ -463,14 +566,18 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev,
if (unmask) if (unmask)
vfio_pci_intx_unmask(vdev); vfio_pci_intx_unmask(vdev);
} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
struct vfio_pci_irq_ctx *ctx = vfio_irq_ctx_get(vdev, 0);
int32_t fd = *(int32_t *)data; int32_t fd = *(int32_t *)data;
if (WARN_ON_ONCE(!ctx))
return -EINVAL;
if (fd >= 0) if (fd >= 0)
return vfio_virqfd_enable((void *) vdev, return vfio_virqfd_enable((void *) vdev,
vfio_pci_intx_unmask_handler, vfio_pci_intx_unmask_handler,
vfio_send_intx_eventfd, NULL, vfio_send_intx_eventfd, NULL,
&vdev->ctx[0].unmask, fd); &ctx->unmask, fd);
vfio_virqfd_disable(&vdev->ctx[0].unmask); vfio_virqfd_disable(&ctx->unmask);
} }
return 0; return 0;
...@@ -543,7 +650,8 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev, ...@@ -543,7 +650,8 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
unsigned index, unsigned start, unsigned index, unsigned start,
unsigned count, uint32_t flags, void *data) unsigned count, uint32_t flags, void *data)
{ {
int i; struct vfio_pci_irq_ctx *ctx;
unsigned int i;
bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false; bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) { if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
...@@ -573,18 +681,19 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev, ...@@ -573,18 +681,19 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
return ret; return ret;
} }
if (!irq_is(vdev, index) || start + count > vdev->num_ctx) if (!irq_is(vdev, index))
return -EINVAL; return -EINVAL;
for (i = start; i < start + count; i++) { for (i = start; i < start + count; i++) {
if (!vdev->ctx[i].trigger) ctx = vfio_irq_ctx_get(vdev, i);
if (!ctx)
continue; continue;
if (flags & VFIO_IRQ_SET_DATA_NONE) { if (flags & VFIO_IRQ_SET_DATA_NONE) {
eventfd_signal(vdev->ctx[i].trigger, 1); eventfd_signal(ctx->trigger, 1);
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) { } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
uint8_t *bools = data; uint8_t *bools = data;
if (bools[i - start]) if (bools[i - start])
eventfd_signal(vdev->ctx[i].trigger, 1); eventfd_signal(ctx->trigger, 1);
} }
} }
return 0; return 0;
......
# SPDX-License-Identifier: GPL-2.0-only # SPDX-License-Identifier: GPL-2.0-only
config VFIO_PLATFORM menu "VFIO support for platform devices"
tristate "VFIO support for platform devices"
depends on ARM || ARM64 || COMPILE_TEST depends on ARM || ARM64 || COMPILE_TEST
config VFIO_PLATFORM_BASE
tristate
select VFIO_VIRQFD select VFIO_VIRQFD
config VFIO_PLATFORM
tristate "Generic VFIO support for any platform device"
select VFIO_PLATFORM_BASE
help help
Support for platform devices with VFIO. This is required to make Support for platform devices with VFIO. This is required to make
use of platform devices present on the system using the VFIO use of platform devices present on the system using the VFIO
...@@ -10,10 +16,10 @@ config VFIO_PLATFORM ...@@ -10,10 +16,10 @@ config VFIO_PLATFORM
If you don't know what to do here, say N. If you don't know what to do here, say N.
if VFIO_PLATFORM
config VFIO_AMBA config VFIO_AMBA
tristate "VFIO support for AMBA devices" tristate "VFIO support for AMBA devices"
depends on ARM_AMBA || COMPILE_TEST depends on ARM_AMBA || COMPILE_TEST
select VFIO_PLATFORM_BASE
help help
Support for ARM AMBA devices with VFIO. This is required to make Support for ARM AMBA devices with VFIO. This is required to make
use of ARM AMBA devices present on the system using the VFIO use of ARM AMBA devices present on the system using the VFIO
...@@ -21,5 +27,9 @@ config VFIO_AMBA ...@@ -21,5 +27,9 @@ config VFIO_AMBA
If you don't know what to do here, say N. If you don't know what to do here, say N.
menu "VFIO platform reset drivers"
depends on VFIO_PLATFORM_BASE
source "drivers/vfio/platform/reset/Kconfig" source "drivers/vfio/platform/reset/Kconfig"
endif endmenu
endmenu
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
vfio-platform-base-y := vfio_platform_common.o vfio_platform_irq.o vfio-platform-base-y := vfio_platform_common.o vfio_platform_irq.o
vfio-platform-y := vfio_platform.o obj-$(CONFIG_VFIO_PLATFORM_BASE) += vfio-platform-base.o
obj-$(CONFIG_VFIO_PLATFORM_BASE) += reset/
vfio-platform-y := vfio_platform.o
obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o
obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform-base.o
obj-$(CONFIG_VFIO_PLATFORM) += reset/
vfio-amba-y := vfio_amba.o vfio-amba-y := vfio_amba.o
obj-$(CONFIG_VFIO_AMBA) += vfio-amba.o obj-$(CONFIG_VFIO_AMBA) += vfio-amba.o
obj-$(CONFIG_VFIO_AMBA) += vfio-platform-base.o
obj-$(CONFIG_VFIO_AMBA) += reset/
# SPDX-License-Identifier: GPL-2.0-only # SPDX-License-Identifier: GPL-2.0-only
if VFIO_PLATFORM
config VFIO_PLATFORM_CALXEDAXGMAC_RESET config VFIO_PLATFORM_CALXEDAXGMAC_RESET
tristate "VFIO support for calxeda xgmac reset" tristate "VFIO support for calxeda xgmac reset"
help help
...@@ -21,3 +22,4 @@ config VFIO_PLATFORM_BCMFLEXRM_RESET ...@@ -21,3 +22,4 @@ config VFIO_PLATFORM_BCMFLEXRM_RESET
Enables the VFIO platform driver to handle reset for Broadcom FlexRM Enables the VFIO platform driver to handle reset for Broadcom FlexRM
If you don't know what to do here, say N. If you don't know what to do here, say N.
endif
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include <linux/mod_devicetable.h> #include <linux/mod_devicetable.h>
#define MAX_CDX_DEV_RESOURCES 4 #define MAX_CDX_DEV_RESOURCES 4
#define CDX_ANY_ID (0xFFFF)
#define CDX_CONTROLLER_ID_SHIFT 4 #define CDX_CONTROLLER_ID_SHIFT 4
#define CDX_BUS_NUM_MASK 0xF #define CDX_BUS_NUM_MASK 0xF
......
...@@ -912,6 +912,12 @@ struct ishtp_device_id { ...@@ -912,6 +912,12 @@ struct ishtp_device_id {
kernel_ulong_t driver_data; kernel_ulong_t driver_data;
}; };
#define CDX_ANY_ID (0xFFFF)
enum {
CDX_ID_F_VFIO_DRIVER_OVERRIDE = 1,
};
/** /**
* struct cdx_device_id - CDX device identifier * struct cdx_device_id - CDX device identifier
* @vendor: Vendor ID * @vendor: Vendor ID
......
...@@ -59,8 +59,7 @@ struct vfio_pci_core_device { ...@@ -59,8 +59,7 @@ struct vfio_pci_core_device {
struct perm_bits *msi_perm; struct perm_bits *msi_perm;
spinlock_t irqlock; spinlock_t irqlock;
struct mutex igate; struct mutex igate;
struct vfio_pci_irq_ctx *ctx; struct xarray ctx;
int num_ctx;
int irq_type; int irq_type;
int num_regions; int num_regions;
struct vfio_pci_region *region; struct vfio_pci_region *region;
...@@ -69,17 +68,18 @@ struct vfio_pci_core_device { ...@@ -69,17 +68,18 @@ struct vfio_pci_core_device {
u16 msix_size; u16 msix_size;
u32 msix_offset; u32 msix_offset;
u32 rbar[7]; u32 rbar[7];
bool pci_2_3; bool has_dyn_msix:1;
bool virq_disabled; bool pci_2_3:1;
bool reset_works; bool virq_disabled:1;
bool extended_caps; bool reset_works:1;
bool bardirty; bool extended_caps:1;
bool has_vga; bool bardirty:1;
bool needs_reset; bool has_vga:1;
bool nointx; bool needs_reset:1;
bool needs_pm_restore; bool nointx:1;
bool pm_intx_masked; bool needs_pm_restore:1;
bool pm_runtime_engaged; bool pm_intx_masked:1;
bool pm_runtime_engaged:1;
struct pci_saved_state *pci_saved_state; struct pci_saved_state *pci_saved_state;
struct pci_saved_state *pm_save; struct pci_saved_state *pm_save;
int ioeventfds_nr; int ioeventfds_nr;
......
...@@ -213,6 +213,7 @@ struct vfio_device_info { ...@@ -213,6 +213,7 @@ struct vfio_device_info {
#define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */ #define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */
#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */ #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */
#define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */ #define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */
#define VFIO_DEVICE_FLAGS_CDX (1 << 8) /* vfio-cdx device */
__u32 num_regions; /* Max region index + 1 */ __u32 num_regions; /* Max region index + 1 */
__u32 num_irqs; /* Max IRQ index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */
__u32 cap_offset; /* Offset within info struct of first cap */ __u32 cap_offset; /* Offset within info struct of first cap */
...@@ -240,6 +241,20 @@ struct vfio_device_info { ...@@ -240,6 +241,20 @@ struct vfio_device_info {
#define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL 3 #define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL 3
#define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP 4 #define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP 4
/*
* The following VFIO_DEVICE_INFO capability reports support for PCIe AtomicOp
* completion to the root bus with supported widths provided via flags.
*/
#define VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP 5
struct vfio_device_info_cap_pci_atomic_comp {
struct vfio_info_cap_header header;
__u32 flags;
#define VFIO_PCI_ATOMIC_COMP32 (1 << 0)
#define VFIO_PCI_ATOMIC_COMP64 (1 << 1)
#define VFIO_PCI_ATOMIC_COMP128 (1 << 2)
__u32 reserved;
};
/** /**
* VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
* struct vfio_region_info) * struct vfio_region_info)
...@@ -511,6 +526,9 @@ struct vfio_region_info_cap_nvlink2_lnkspd { ...@@ -511,6 +526,9 @@ struct vfio_region_info_cap_nvlink2_lnkspd {
* then add and unmask vectors, it's up to userspace to make the decision * then add and unmask vectors, it's up to userspace to make the decision
* whether to allocate the maximum supported number of vectors or tear * whether to allocate the maximum supported number of vectors or tear
* down setup and incrementally increase the vectors as each is enabled. * down setup and incrementally increase the vectors as each is enabled.
* Absence of the NORESIZE flag indicates that vectors can be enabled
* and disabled dynamically without impacting other vectors within the
* index.
*/ */
struct vfio_irq_info { struct vfio_irq_info {
__u32 argsz; __u32 argsz;
......
...@@ -265,6 +265,7 @@ int main(void) ...@@ -265,6 +265,7 @@ int main(void)
DEVID(cdx_device_id); DEVID(cdx_device_id);
DEVID_FIELD(cdx_device_id, vendor); DEVID_FIELD(cdx_device_id, vendor);
DEVID_FIELD(cdx_device_id, device); DEVID_FIELD(cdx_device_id, device);
DEVID_FIELD(cdx_device_id, override_only);
return 0; return 0;
} }
...@@ -1458,8 +1458,23 @@ static int do_cdx_entry(const char *filename, void *symval, ...@@ -1458,8 +1458,23 @@ static int do_cdx_entry(const char *filename, void *symval,
{ {
DEF_FIELD(symval, cdx_device_id, vendor); DEF_FIELD(symval, cdx_device_id, vendor);
DEF_FIELD(symval, cdx_device_id, device); DEF_FIELD(symval, cdx_device_id, device);
DEF_FIELD(symval, cdx_device_id, override_only);
sprintf(alias, "cdx:v%08Xd%08Xd", vendor, device); switch (override_only) {
case 0:
strcpy(alias, "cdx:");
break;
case CDX_ID_F_VFIO_DRIVER_OVERRIDE:
strcpy(alias, "vfio_cdx:");
break;
default:
warn("Unknown CDX driver_override alias %08X\n",
override_only);
return 0;
}
ADD(alias, "v", vendor != CDX_ANY_ID, vendor);
ADD(alias, "d", device != CDX_ANY_ID, device);
return 1; return 1;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment