Commit 30aec6e1 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfio-v6.10-rc1' of https://github.com/awilliam/linux-vfio

Pull vfio updates from Alex Williamson:

 - The vfio fsl-mc bus driver has become orphaned. We'll consider
   removing it in future releases if a new maintainer isn't found (Alex
   Williamson)

 - Improved usage of opaque data in vfio-pci INTx handling, avoiding
   lookups of the eventfd through the interrupt and irqfd runtime paths
   (Alex Williamson)

 - Resolve an error path memory leak introduced in vfio-pci interrupt
   code (Ye Bin)

 - Addition of interrupt support for vfio devices exposed on the CDX
   bus, including a new MSI allocation helper and export of existing
   helpers for MSI alloc and free (Nipun Gupta)

 - A new vfio-pci variant driver supporting migration of Intel QAT VF
   devices for the GEN4 PFs (Xin Zeng & Yahui Cao)

 - Resolve a possibly circular locking dependency in vfio-pci by
   avoiding copy_to_user() from a PCI bus walk callback (Alex
   Williamson)

 - Trivial docs update to remove a duplicate semicolon (Foryun Ma)

* tag 'vfio-v6.10-rc1' of https://github.com/awilliam/linux-vfio:
  vfio/pci: Restore zero affected bus reset devices warning
  vfio: remove an extra semicolon
  vfio/pci: Collect hot-reset devices to local buffer
  vfio/qat: Add vfio_pci driver for Intel QAT SR-IOV VF devices
  vfio/cdx: add interrupt support
  genirq/msi: Add MSI allocation helper and export MSI functions
  vfio/pci: fix potential memory leak in vfio_intx_enable()
  vfio/pci: Pass eventfd context object through irqfd
  vfio/pci: Pass eventfd context to IRQ handler
  MAINTAINERS: Orphan vfio fsl-mc bus driver
parents 70ec81c2 cbb325e7
...@@ -364,7 +364,7 @@ IOMMUFD IOAS/HWPT to enable userspace DMA:: ...@@ -364,7 +364,7 @@ IOMMUFD IOAS/HWPT to enable userspace DMA::
MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
map.iova = 0; /* 1MB starting at 0x0 from device view */ map.iova = 0; /* 1MB starting at 0x0 from device view */
map.length = 1024 * 1024; map.length = 1024 * 1024;
map.ioas_id = alloc_data.out_ioas_id;; map.ioas_id = alloc_data.out_ioas_id;
ioctl(iommufd, IOMMU_IOAS_MAP, &map); ioctl(iommufd, IOMMU_IOAS_MAP, &map);
......
...@@ -23512,9 +23512,8 @@ F: include/linux/vfio_pci_core.h ...@@ -23512,9 +23512,8 @@ F: include/linux/vfio_pci_core.h
F: include/uapi/linux/vfio.h F: include/uapi/linux/vfio.h
VFIO FSL-MC DRIVER VFIO FSL-MC DRIVER
M: Diana Craciun <diana.craciun@oss.nxp.com>
L: kvm@vger.kernel.org L: kvm@vger.kernel.org
S: Maintained S: Orphan
F: drivers/vfio/fsl-mc/ F: drivers/vfio/fsl-mc/
VFIO HISILICON PCI DRIVER VFIO HISILICON PCI DRIVER
...@@ -23568,6 +23567,14 @@ L: kvm@vger.kernel.org ...@@ -23568,6 +23567,14 @@ L: kvm@vger.kernel.org
S: Maintained S: Maintained
F: drivers/vfio/platform/ F: drivers/vfio/platform/
VFIO QAT PCI DRIVER
M: Xin Zeng <xin.zeng@intel.com>
M: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
L: kvm@vger.kernel.org
L: qat-linux@intel.com
S: Supported
F: drivers/vfio/pci/qat/
VFIO VIRTIO PCI DRIVER VFIO VIRTIO PCI DRIVER
M: Yishai Hadas <yishaih@nvidia.com> M: Yishai Hadas <yishaih@nvidia.com>
L: kvm@vger.kernel.org L: kvm@vger.kernel.org
......
...@@ -5,4 +5,4 @@ ...@@ -5,4 +5,4 @@
obj-$(CONFIG_VFIO_CDX) += vfio-cdx.o obj-$(CONFIG_VFIO_CDX) += vfio-cdx.o
vfio-cdx-objs := main.o vfio-cdx-objs := main.o intr.o
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
*/
#include <linux/vfio.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/eventfd.h>
#include <linux/msi.h>
#include <linux/interrupt.h>
#include "linux/cdx/cdx_bus.h"
#include "private.h"
static irqreturn_t vfio_cdx_msihandler(int irq_no, void *arg)
{
struct eventfd_ctx *trigger = arg;
eventfd_signal(trigger);
return IRQ_HANDLED;
}
static int vfio_cdx_msi_enable(struct vfio_cdx_device *vdev, int nvec)
{
struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
struct device *dev = vdev->vdev.dev;
int msi_idx, ret;
vdev->cdx_irqs = kcalloc(nvec, sizeof(struct vfio_cdx_irq), GFP_KERNEL);
if (!vdev->cdx_irqs)
return -ENOMEM;
ret = cdx_enable_msi(cdx_dev);
if (ret) {
kfree(vdev->cdx_irqs);
return ret;
}
/* Allocate cdx MSIs */
ret = msi_domain_alloc_irqs(dev, MSI_DEFAULT_DOMAIN, nvec);
if (ret) {
cdx_disable_msi(cdx_dev);
kfree(vdev->cdx_irqs);
return ret;
}
for (msi_idx = 0; msi_idx < nvec; msi_idx++)
vdev->cdx_irqs[msi_idx].irq_no = msi_get_virq(dev, msi_idx);
vdev->msi_count = nvec;
vdev->config_msi = 1;
return 0;
}
static int vfio_cdx_msi_set_vector_signal(struct vfio_cdx_device *vdev,
int vector, int fd)
{
struct eventfd_ctx *trigger;
int irq_no, ret;
if (vector < 0 || vector >= vdev->msi_count)
return -EINVAL;
irq_no = vdev->cdx_irqs[vector].irq_no;
if (vdev->cdx_irqs[vector].trigger) {
free_irq(irq_no, vdev->cdx_irqs[vector].trigger);
kfree(vdev->cdx_irqs[vector].name);
eventfd_ctx_put(vdev->cdx_irqs[vector].trigger);
vdev->cdx_irqs[vector].trigger = NULL;
}
if (fd < 0)
return 0;
vdev->cdx_irqs[vector].name = kasprintf(GFP_KERNEL, "vfio-msi[%d](%s)",
vector, dev_name(vdev->vdev.dev));
if (!vdev->cdx_irqs[vector].name)
return -ENOMEM;
trigger = eventfd_ctx_fdget(fd);
if (IS_ERR(trigger)) {
kfree(vdev->cdx_irqs[vector].name);
return PTR_ERR(trigger);
}
ret = request_irq(irq_no, vfio_cdx_msihandler, 0,
vdev->cdx_irqs[vector].name, trigger);
if (ret) {
kfree(vdev->cdx_irqs[vector].name);
eventfd_ctx_put(trigger);
return ret;
}
vdev->cdx_irqs[vector].trigger = trigger;
return 0;
}
static int vfio_cdx_msi_set_block(struct vfio_cdx_device *vdev,
unsigned int start, unsigned int count,
int32_t *fds)
{
int i, j, ret = 0;
if (start >= vdev->msi_count || start + count > vdev->msi_count)
return -EINVAL;
for (i = 0, j = start; i < count && !ret; i++, j++) {
int fd = fds ? fds[i] : -1;
ret = vfio_cdx_msi_set_vector_signal(vdev, j, fd);
}
if (ret) {
for (--j; j >= (int)start; j--)
vfio_cdx_msi_set_vector_signal(vdev, j, -1);
}
return ret;
}
static void vfio_cdx_msi_disable(struct vfio_cdx_device *vdev)
{
struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
struct device *dev = vdev->vdev.dev;
vfio_cdx_msi_set_block(vdev, 0, vdev->msi_count, NULL);
if (!vdev->config_msi)
return;
msi_domain_free_irqs_all(dev, MSI_DEFAULT_DOMAIN);
cdx_disable_msi(cdx_dev);
kfree(vdev->cdx_irqs);
vdev->cdx_irqs = NULL;
vdev->msi_count = 0;
vdev->config_msi = 0;
}
static int vfio_cdx_set_msi_trigger(struct vfio_cdx_device *vdev,
unsigned int index, unsigned int start,
unsigned int count, u32 flags,
void *data)
{
struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
int i;
if (start + count > cdx_dev->num_msi)
return -EINVAL;
if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
vfio_cdx_msi_disable(vdev);
return 0;
}
if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
s32 *fds = data;
int ret;
if (vdev->config_msi)
return vfio_cdx_msi_set_block(vdev, start, count,
fds);
ret = vfio_cdx_msi_enable(vdev, cdx_dev->num_msi);
if (ret)
return ret;
ret = vfio_cdx_msi_set_block(vdev, start, count, fds);
if (ret)
vfio_cdx_msi_disable(vdev);
return ret;
}
for (i = start; i < start + count; i++) {
if (!vdev->cdx_irqs[i].trigger)
continue;
if (flags & VFIO_IRQ_SET_DATA_NONE) {
eventfd_signal(vdev->cdx_irqs[i].trigger);
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
u8 *bools = data;
if (bools[i - start])
eventfd_signal(vdev->cdx_irqs[i].trigger);
}
}
return 0;
}
int vfio_cdx_set_irqs_ioctl(struct vfio_cdx_device *vdev,
u32 flags, unsigned int index,
unsigned int start, unsigned int count,
void *data)
{
if (flags & VFIO_IRQ_SET_ACTION_TRIGGER)
return vfio_cdx_set_msi_trigger(vdev, index, start,
count, flags, data);
else
return -EINVAL;
}
/* Free All IRQs for the given device */
void vfio_cdx_irqs_cleanup(struct vfio_cdx_device *vdev)
{
/*
* Device does not support any interrupt or the interrupts
* were not configured
*/
if (!vdev->cdx_irqs)
return;
vfio_cdx_set_msi_trigger(vdev, 0, 0, 0, VFIO_IRQ_SET_DATA_NONE, NULL);
}
...@@ -61,6 +61,7 @@ static void vfio_cdx_close_device(struct vfio_device *core_vdev) ...@@ -61,6 +61,7 @@ static void vfio_cdx_close_device(struct vfio_device *core_vdev)
kfree(vdev->regions); kfree(vdev->regions);
cdx_dev_reset(core_vdev->dev); cdx_dev_reset(core_vdev->dev);
vfio_cdx_irqs_cleanup(vdev);
} }
static int vfio_cdx_bm_ctrl(struct vfio_device *core_vdev, u32 flags, static int vfio_cdx_bm_ctrl(struct vfio_device *core_vdev, u32 flags,
...@@ -123,7 +124,7 @@ static int vfio_cdx_ioctl_get_info(struct vfio_cdx_device *vdev, ...@@ -123,7 +124,7 @@ static int vfio_cdx_ioctl_get_info(struct vfio_cdx_device *vdev,
info.flags |= VFIO_DEVICE_FLAGS_RESET; info.flags |= VFIO_DEVICE_FLAGS_RESET;
info.num_regions = cdx_dev->res_count; info.num_regions = cdx_dev->res_count;
info.num_irqs = 0; info.num_irqs = cdx_dev->num_msi ? 1 : 0;
return copy_to_user(arg, &info, minsz) ? -EFAULT : 0; return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
} }
...@@ -152,6 +153,62 @@ static int vfio_cdx_ioctl_get_region_info(struct vfio_cdx_device *vdev, ...@@ -152,6 +153,62 @@ static int vfio_cdx_ioctl_get_region_info(struct vfio_cdx_device *vdev,
return copy_to_user(arg, &info, minsz) ? -EFAULT : 0; return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
} }
static int vfio_cdx_ioctl_get_irq_info(struct vfio_cdx_device *vdev,
struct vfio_irq_info __user *arg)
{
unsigned long minsz = offsetofend(struct vfio_irq_info, count);
struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
struct vfio_irq_info info;
if (copy_from_user(&info, arg, minsz))
return -EFAULT;
if (info.argsz < minsz)
return -EINVAL;
if (info.index >= 1)
return -EINVAL;
if (!cdx_dev->num_msi)
return -EINVAL;
info.flags = VFIO_IRQ_INFO_EVENTFD | VFIO_IRQ_INFO_NORESIZE;
info.count = cdx_dev->num_msi;
return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
}
static int vfio_cdx_ioctl_set_irqs(struct vfio_cdx_device *vdev,
struct vfio_irq_set __user *arg)
{
unsigned long minsz = offsetofend(struct vfio_irq_set, count);
struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
struct vfio_irq_set hdr;
size_t data_size = 0;
u8 *data = NULL;
int ret = 0;
if (copy_from_user(&hdr, arg, minsz))
return -EFAULT;
ret = vfio_set_irqs_validate_and_prepare(&hdr, cdx_dev->num_msi,
1, &data_size);
if (ret)
return ret;
if (data_size) {
data = memdup_user(arg->data, data_size);
if (IS_ERR(data))
return PTR_ERR(data);
}
ret = vfio_cdx_set_irqs_ioctl(vdev, hdr.flags, hdr.index,
hdr.start, hdr.count, data);
kfree(data);
return ret;
}
static long vfio_cdx_ioctl(struct vfio_device *core_vdev, static long vfio_cdx_ioctl(struct vfio_device *core_vdev,
unsigned int cmd, unsigned long arg) unsigned int cmd, unsigned long arg)
{ {
...@@ -164,6 +221,10 @@ static long vfio_cdx_ioctl(struct vfio_device *core_vdev, ...@@ -164,6 +221,10 @@ static long vfio_cdx_ioctl(struct vfio_device *core_vdev,
return vfio_cdx_ioctl_get_info(vdev, uarg); return vfio_cdx_ioctl_get_info(vdev, uarg);
case VFIO_DEVICE_GET_REGION_INFO: case VFIO_DEVICE_GET_REGION_INFO:
return vfio_cdx_ioctl_get_region_info(vdev, uarg); return vfio_cdx_ioctl_get_region_info(vdev, uarg);
case VFIO_DEVICE_GET_IRQ_INFO:
return vfio_cdx_ioctl_get_irq_info(vdev, uarg);
case VFIO_DEVICE_SET_IRQS:
return vfio_cdx_ioctl_set_irqs(vdev, uarg);
case VFIO_DEVICE_RESET: case VFIO_DEVICE_RESET:
return cdx_dev_reset(core_vdev->dev); return cdx_dev_reset(core_vdev->dev);
default: default:
......
...@@ -13,6 +13,14 @@ static inline u64 vfio_cdx_index_to_offset(u32 index) ...@@ -13,6 +13,14 @@ static inline u64 vfio_cdx_index_to_offset(u32 index)
return ((u64)(index) << VFIO_CDX_OFFSET_SHIFT); return ((u64)(index) << VFIO_CDX_OFFSET_SHIFT);
} }
struct vfio_cdx_irq {
u32 flags;
u32 count;
int irq_no;
struct eventfd_ctx *trigger;
char *name;
};
struct vfio_cdx_region { struct vfio_cdx_region {
u32 flags; u32 flags;
u32 type; u32 type;
...@@ -23,8 +31,18 @@ struct vfio_cdx_region { ...@@ -23,8 +31,18 @@ struct vfio_cdx_region {
struct vfio_cdx_device { struct vfio_cdx_device {
struct vfio_device vdev; struct vfio_device vdev;
struct vfio_cdx_region *regions; struct vfio_cdx_region *regions;
struct vfio_cdx_irq *cdx_irqs;
u32 flags; u32 flags;
#define BME_SUPPORT BIT(0) #define BME_SUPPORT BIT(0)
u32 msi_count;
u8 config_msi;
}; };
int vfio_cdx_set_irqs_ioctl(struct vfio_cdx_device *vdev,
u32 flags, unsigned int index,
unsigned int start, unsigned int count,
void *data);
void vfio_cdx_irqs_cleanup(struct vfio_cdx_device *vdev);
#endif /* VFIO_CDX_PRIVATE_H */ #endif /* VFIO_CDX_PRIVATE_H */
...@@ -69,4 +69,6 @@ source "drivers/vfio/pci/virtio/Kconfig" ...@@ -69,4 +69,6 @@ source "drivers/vfio/pci/virtio/Kconfig"
source "drivers/vfio/pci/nvgrace-gpu/Kconfig" source "drivers/vfio/pci/nvgrace-gpu/Kconfig"
source "drivers/vfio/pci/qat/Kconfig"
endmenu endmenu
...@@ -17,3 +17,5 @@ obj-$(CONFIG_PDS_VFIO_PCI) += pds/ ...@@ -17,3 +17,5 @@ obj-$(CONFIG_PDS_VFIO_PCI) += pds/
obj-$(CONFIG_VIRTIO_VFIO_PCI) += virtio/ obj-$(CONFIG_VIRTIO_VFIO_PCI) += virtio/
obj-$(CONFIG_NVGRACE_GPU_VFIO_PCI) += nvgrace-gpu/ obj-$(CONFIG_NVGRACE_GPU_VFIO_PCI) += nvgrace-gpu/
obj-$(CONFIG_QAT_VFIO_PCI) += qat/
# SPDX-License-Identifier: GPL-2.0-only
config QAT_VFIO_PCI
tristate "VFIO support for QAT VF PCI devices"
select VFIO_PCI_CORE
depends on CRYPTO_DEV_QAT_4XXX
help
This provides migration support for Intel(R) QAT Virtual Function
using the VFIO framework.
To compile this as a module, choose M here: the module
will be called qat_vfio_pci. If you don't know what to do here,
say N.
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_QAT_VFIO_PCI) += qat_vfio_pci.o
qat_vfio_pci-y := main.o
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2024 Intel Corporation */
#include <linux/anon_inodes.h>
#include <linux/container_of.h>
#include <linux/device.h>
#include <linux/file.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/sizes.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/vfio_pci_core.h>
#include <linux/qat/qat_mig_dev.h>
/*
* The migration data of each Intel QAT VF device is encapsulated into a
* 4096 bytes block. The data consists of two parts.
* The first is a pre-configured set of attributes of the VF being migrated,
* which are only set when it is created. This can be migrated during pre-copy
* stage and used for a device compatibility check.
* The second is the VF state. This includes the required MMIO regions and
* the shadow states maintained by the QAT PF driver. This part can only be
* saved when the VF is fully quiesced and be migrated during stop-copy stage.
* Both these 2 parts of data are saved in hierarchical structures including
* a preamble section and several raw state sections.
* When the pre-configured part of the migration data is fully retrieved from
* user space, the preamble section are used to validate the correctness of
* the data blocks and check the version compatibility. The raw state sections
* are then used to do a device compatibility check.
* When the device transits from RESUMING state, the VF states are extracted
* from the raw state sections of the VF state part of the migration data and
* then loaded into the device.
*/
struct qat_vf_migration_file {
struct file *filp;
/* protects migration region context */
struct mutex lock;
bool disabled;
struct qat_vf_core_device *qat_vdev;
ssize_t filled_size;
};
struct qat_vf_core_device {
struct vfio_pci_core_device core_device;
struct qat_mig_dev *mdev;
/* protects migration state */
struct mutex state_mutex;
enum vfio_device_mig_state mig_state;
struct qat_vf_migration_file *resuming_migf;
struct qat_vf_migration_file *saving_migf;
};
static int qat_vf_pci_open_device(struct vfio_device *core_vdev)
{
struct qat_vf_core_device *qat_vdev =
container_of(core_vdev, struct qat_vf_core_device,
core_device.vdev);
struct vfio_pci_core_device *vdev = &qat_vdev->core_device;
int ret;
ret = vfio_pci_core_enable(vdev);
if (ret)
return ret;
ret = qat_vfmig_open(qat_vdev->mdev);
if (ret) {
vfio_pci_core_disable(vdev);
return ret;
}
qat_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
vfio_pci_core_finish_enable(vdev);
return 0;
}
static void qat_vf_disable_fd(struct qat_vf_migration_file *migf)
{
mutex_lock(&migf->lock);
migf->disabled = true;
migf->filp->f_pos = 0;
migf->filled_size = 0;
mutex_unlock(&migf->lock);
}
static void qat_vf_disable_fds(struct qat_vf_core_device *qat_vdev)
{
if (qat_vdev->resuming_migf) {
qat_vf_disable_fd(qat_vdev->resuming_migf);
fput(qat_vdev->resuming_migf->filp);
qat_vdev->resuming_migf = NULL;
}
if (qat_vdev->saving_migf) {
qat_vf_disable_fd(qat_vdev->saving_migf);
fput(qat_vdev->saving_migf->filp);
qat_vdev->saving_migf = NULL;
}
}
static void qat_vf_pci_close_device(struct vfio_device *core_vdev)
{
struct qat_vf_core_device *qat_vdev = container_of(core_vdev,
struct qat_vf_core_device, core_device.vdev);
qat_vfmig_close(qat_vdev->mdev);
qat_vf_disable_fds(qat_vdev);
vfio_pci_core_close_device(core_vdev);
}
static long qat_vf_precopy_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
struct qat_vf_migration_file *migf = filp->private_data;
struct qat_vf_core_device *qat_vdev = migf->qat_vdev;
struct qat_mig_dev *mig_dev = qat_vdev->mdev;
struct vfio_precopy_info info;
loff_t *pos = &filp->f_pos;
unsigned long minsz;
int ret = 0;
if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
return -ENOTTY;
minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
if (copy_from_user(&info, (void __user *)arg, minsz))
return -EFAULT;
if (info.argsz < minsz)
return -EINVAL;
mutex_lock(&qat_vdev->state_mutex);
if (qat_vdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
qat_vdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) {
mutex_unlock(&qat_vdev->state_mutex);
return -EINVAL;
}
mutex_lock(&migf->lock);
if (migf->disabled) {
ret = -ENODEV;
goto out;
}
if (*pos > mig_dev->setup_size) {
ret = -EINVAL;
goto out;
}
info.dirty_bytes = 0;
info.initial_bytes = mig_dev->setup_size - *pos;
out:
mutex_unlock(&migf->lock);
mutex_unlock(&qat_vdev->state_mutex);
if (ret)
return ret;
return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
}
static ssize_t qat_vf_save_read(struct file *filp, char __user *buf,
size_t len, loff_t *pos)
{
struct qat_vf_migration_file *migf = filp->private_data;
struct qat_mig_dev *mig_dev = migf->qat_vdev->mdev;
ssize_t done = 0;
loff_t *offs;
int ret;
if (pos)
return -ESPIPE;
offs = &filp->f_pos;
mutex_lock(&migf->lock);
if (*offs > migf->filled_size || *offs < 0) {
done = -EINVAL;
goto out_unlock;
}
if (migf->disabled) {
done = -ENODEV;
goto out_unlock;
}
len = min_t(size_t, migf->filled_size - *offs, len);
if (len) {
ret = copy_to_user(buf, mig_dev->state + *offs, len);
if (ret) {
done = -EFAULT;
goto out_unlock;
}
*offs += len;
done = len;
}
out_unlock:
mutex_unlock(&migf->lock);
return done;
}
static int qat_vf_release_file(struct inode *inode, struct file *filp)
{
struct qat_vf_migration_file *migf = filp->private_data;
qat_vf_disable_fd(migf);
mutex_destroy(&migf->lock);
kfree(migf);
return 0;
}
static const struct file_operations qat_vf_save_fops = {
.owner = THIS_MODULE,
.read = qat_vf_save_read,
.unlocked_ioctl = qat_vf_precopy_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.release = qat_vf_release_file,
.llseek = no_llseek,
};
static int qat_vf_save_state(struct qat_vf_core_device *qat_vdev,
struct qat_vf_migration_file *migf)
{
int ret;
ret = qat_vfmig_save_state(qat_vdev->mdev);
if (ret)
return ret;
migf->filled_size = qat_vdev->mdev->state_size;
return 0;
}
static int qat_vf_save_setup(struct qat_vf_core_device *qat_vdev,
struct qat_vf_migration_file *migf)
{
int ret;
ret = qat_vfmig_save_setup(qat_vdev->mdev);
if (ret)
return ret;
migf->filled_size = qat_vdev->mdev->setup_size;
return 0;
}
/*
* Allocate a file handler for user space and then save the migration data for
* the device being migrated. If this is called in the pre-copy stage, save the
* pre-configured device data. Otherwise, if this is called in the stop-copy
* stage, save the device state. In both cases, update the data size which can
* then be read from user space.
*/
static struct qat_vf_migration_file *
qat_vf_save_device_data(struct qat_vf_core_device *qat_vdev, bool pre_copy)
{
struct qat_vf_migration_file *migf;
int ret;
migf = kzalloc(sizeof(*migf), GFP_KERNEL);
if (!migf)
return ERR_PTR(-ENOMEM);
migf->filp = anon_inode_getfile("qat_vf_mig", &qat_vf_save_fops,
migf, O_RDONLY);
ret = PTR_ERR_OR_ZERO(migf->filp);
if (ret) {
kfree(migf);
return ERR_PTR(ret);
}
stream_open(migf->filp->f_inode, migf->filp);
mutex_init(&migf->lock);
if (pre_copy)
ret = qat_vf_save_setup(qat_vdev, migf);
else
ret = qat_vf_save_state(qat_vdev, migf);
if (ret) {
fput(migf->filp);
return ERR_PTR(ret);
}
migf->qat_vdev = qat_vdev;
return migf;
}
static ssize_t qat_vf_resume_write(struct file *filp, const char __user *buf,
size_t len, loff_t *pos)
{
struct qat_vf_migration_file *migf = filp->private_data;
struct qat_mig_dev *mig_dev = migf->qat_vdev->mdev;
loff_t end, *offs;
ssize_t done = 0;
int ret;
if (pos)
return -ESPIPE;
offs = &filp->f_pos;
if (*offs < 0 ||
check_add_overflow((loff_t)len, *offs, &end))
return -EOVERFLOW;
if (end > mig_dev->state_size)
return -ENOMEM;
mutex_lock(&migf->lock);
if (migf->disabled) {
done = -ENODEV;
goto out_unlock;
}
ret = copy_from_user(mig_dev->state + *offs, buf, len);
if (ret) {
done = -EFAULT;
goto out_unlock;
}
*offs += len;
migf->filled_size += len;
/*
* Load the pre-configured device data first to check if the target
* device is compatible with the source device.
*/
ret = qat_vfmig_load_setup(mig_dev, migf->filled_size);
if (ret && ret != -EAGAIN) {
done = ret;
goto out_unlock;
}
done = len;
out_unlock:
mutex_unlock(&migf->lock);
return done;
}
static const struct file_operations qat_vf_resume_fops = {
.owner = THIS_MODULE,
.write = qat_vf_resume_write,
.release = qat_vf_release_file,
.llseek = no_llseek,
};
static struct qat_vf_migration_file *
qat_vf_resume_device_data(struct qat_vf_core_device *qat_vdev)
{
struct qat_vf_migration_file *migf;
int ret;
migf = kzalloc(sizeof(*migf), GFP_KERNEL);
if (!migf)
return ERR_PTR(-ENOMEM);
migf->filp = anon_inode_getfile("qat_vf_mig", &qat_vf_resume_fops, migf, O_WRONLY);
ret = PTR_ERR_OR_ZERO(migf->filp);
if (ret) {
kfree(migf);
return ERR_PTR(ret);
}
migf->qat_vdev = qat_vdev;
migf->filled_size = 0;
stream_open(migf->filp->f_inode, migf->filp);
mutex_init(&migf->lock);
return migf;
}
static int qat_vf_load_device_data(struct qat_vf_core_device *qat_vdev)
{
return qat_vfmig_load_state(qat_vdev->mdev);
}
static struct file *qat_vf_pci_step_device_state(struct qat_vf_core_device *qat_vdev, u32 new)
{
u32 cur = qat_vdev->mig_state;
int ret;
/*
* As the device is not capable of just stopping P2P DMAs, suspend the
* device completely once any of the P2P states are reached.
* When it is suspended, all its MMIO registers can still be operated
* correctly, jobs submitted through ring are queued while no jobs are
* processed by the device. The MMIO states can be safely migrated to
* the target VF during stop-copy stage and restored correctly in the
* target VF. All queued jobs can be resumed then.
*/
if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
(cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
ret = qat_vfmig_suspend(qat_vdev->mdev);
if (ret)
return ERR_PTR(ret);
return NULL;
}
if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) ||
(cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) {
qat_vfmig_resume(qat_vdev->mdev);
return NULL;
}
if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) ||
(cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P))
return NULL;
if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
struct qat_vf_migration_file *migf;
migf = qat_vf_save_device_data(qat_vdev, false);
if (IS_ERR(migf))
return ERR_CAST(migf);
get_file(migf->filp);
qat_vdev->saving_migf = migf;
return migf->filp;
}
if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
struct qat_vf_migration_file *migf;
migf = qat_vf_resume_device_data(qat_vdev);
if (IS_ERR(migf))
return ERR_CAST(migf);
get_file(migf->filp);
qat_vdev->resuming_migf = migf;
return migf->filp;
}
if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) ||
(cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) ||
(cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) {
qat_vf_disable_fds(qat_vdev);
return NULL;
}
if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) ||
(cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
struct qat_vf_migration_file *migf;
migf = qat_vf_save_device_data(qat_vdev, true);
if (IS_ERR(migf))
return ERR_CAST(migf);
get_file(migf->filp);
qat_vdev->saving_migf = migf;
return migf->filp;
}
if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) {
struct qat_vf_migration_file *migf = qat_vdev->saving_migf;
if (!migf)
return ERR_PTR(-EINVAL);
ret = qat_vf_save_state(qat_vdev, migf);
if (ret)
return ERR_PTR(ret);
return NULL;
}
if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
ret = qat_vf_load_device_data(qat_vdev);
if (ret)
return ERR_PTR(ret);
qat_vf_disable_fds(qat_vdev);
return NULL;
}
/* vfio_mig_get_next_state() does not use arcs other than the above */
WARN_ON(true);
return ERR_PTR(-EINVAL);
}
static void qat_vf_reset_done(struct qat_vf_core_device *qat_vdev)
{
qat_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
qat_vfmig_reset(qat_vdev->mdev);
qat_vf_disable_fds(qat_vdev);
}
static struct file *qat_vf_pci_set_device_state(struct vfio_device *vdev,
enum vfio_device_mig_state new_state)
{
struct qat_vf_core_device *qat_vdev = container_of(vdev,
struct qat_vf_core_device, core_device.vdev);
enum vfio_device_mig_state next_state;
struct file *res = NULL;
int ret;
mutex_lock(&qat_vdev->state_mutex);
while (new_state != qat_vdev->mig_state) {
ret = vfio_mig_get_next_state(vdev, qat_vdev->mig_state,
new_state, &next_state);
if (ret) {
res = ERR_PTR(ret);
break;
}
res = qat_vf_pci_step_device_state(qat_vdev, next_state);
if (IS_ERR(res))
break;
qat_vdev->mig_state = next_state;
if (WARN_ON(res && new_state != qat_vdev->mig_state)) {
fput(res);
res = ERR_PTR(-EINVAL);
break;
}
}
mutex_unlock(&qat_vdev->state_mutex);
return res;
}
static int qat_vf_pci_get_device_state(struct vfio_device *vdev,
enum vfio_device_mig_state *curr_state)
{
struct qat_vf_core_device *qat_vdev = container_of(vdev,
struct qat_vf_core_device, core_device.vdev);
mutex_lock(&qat_vdev->state_mutex);
*curr_state = qat_vdev->mig_state;
mutex_unlock(&qat_vdev->state_mutex);
return 0;
}
static int qat_vf_pci_get_data_size(struct vfio_device *vdev,
unsigned long *stop_copy_length)
{
struct qat_vf_core_device *qat_vdev = container_of(vdev,
struct qat_vf_core_device, core_device.vdev);
mutex_lock(&qat_vdev->state_mutex);
*stop_copy_length = qat_vdev->mdev->state_size;
mutex_unlock(&qat_vdev->state_mutex);
return 0;
}
static const struct vfio_migration_ops qat_vf_pci_mig_ops = {
.migration_set_state = qat_vf_pci_set_device_state,
.migration_get_state = qat_vf_pci_get_device_state,
.migration_get_data_size = qat_vf_pci_get_data_size,
};
static void qat_vf_pci_release_dev(struct vfio_device *core_vdev)
{
struct qat_vf_core_device *qat_vdev = container_of(core_vdev,
struct qat_vf_core_device, core_device.vdev);
qat_vfmig_cleanup(qat_vdev->mdev);
qat_vfmig_destroy(qat_vdev->mdev);
mutex_destroy(&qat_vdev->state_mutex);
vfio_pci_core_release_dev(core_vdev);
}
static int qat_vf_pci_init_dev(struct vfio_device *core_vdev)
{
struct qat_vf_core_device *qat_vdev = container_of(core_vdev,
struct qat_vf_core_device, core_device.vdev);
struct qat_mig_dev *mdev;
struct pci_dev *parent;
int ret, vf_id;
core_vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P |
VFIO_MIGRATION_PRE_COPY;
core_vdev->mig_ops = &qat_vf_pci_mig_ops;
ret = vfio_pci_core_init_dev(core_vdev);
if (ret)
return ret;
mutex_init(&qat_vdev->state_mutex);
parent = pci_physfn(qat_vdev->core_device.pdev);
vf_id = pci_iov_vf_id(qat_vdev->core_device.pdev);
if (vf_id < 0) {
ret = -ENODEV;
goto err_rel;
}
mdev = qat_vfmig_create(parent, vf_id);
if (IS_ERR(mdev)) {
ret = PTR_ERR(mdev);
goto err_rel;
}
ret = qat_vfmig_init(mdev);
if (ret)
goto err_destroy;
qat_vdev->mdev = mdev;
return 0;
err_destroy:
qat_vfmig_destroy(mdev);
err_rel:
vfio_pci_core_release_dev(core_vdev);
return ret;
}
static const struct vfio_device_ops qat_vf_pci_ops = {
.name = "qat-vf-vfio-pci",
.init = qat_vf_pci_init_dev,
.release = qat_vf_pci_release_dev,
.open_device = qat_vf_pci_open_device,
.close_device = qat_vf_pci_close_device,
.ioctl = vfio_pci_core_ioctl,
.read = vfio_pci_core_read,
.write = vfio_pci_core_write,
.mmap = vfio_pci_core_mmap,
.request = vfio_pci_core_request,
.match = vfio_pci_core_match,
.bind_iommufd = vfio_iommufd_physical_bind,
.unbind_iommufd = vfio_iommufd_physical_unbind,
.attach_ioas = vfio_iommufd_physical_attach_ioas,
.detach_ioas = vfio_iommufd_physical_detach_ioas,
};
static struct qat_vf_core_device *qat_vf_drvdata(struct pci_dev *pdev)
{
struct vfio_pci_core_device *core_device = pci_get_drvdata(pdev);
return container_of(core_device, struct qat_vf_core_device, core_device);
}
static void qat_vf_pci_aer_reset_done(struct pci_dev *pdev)
{
struct qat_vf_core_device *qat_vdev = qat_vf_drvdata(pdev);
if (!qat_vdev->mdev)
return;
mutex_lock(&qat_vdev->state_mutex);
qat_vf_reset_done(qat_vdev);
mutex_unlock(&qat_vdev->state_mutex);
}
static int
qat_vf_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct device *dev = &pdev->dev;
struct qat_vf_core_device *qat_vdev;
int ret;
qat_vdev = vfio_alloc_device(qat_vf_core_device, core_device.vdev, dev, &qat_vf_pci_ops);
if (IS_ERR(qat_vdev))
return PTR_ERR(qat_vdev);
pci_set_drvdata(pdev, &qat_vdev->core_device);
ret = vfio_pci_core_register_device(&qat_vdev->core_device);
if (ret)
goto out_put_device;
return 0;
out_put_device:
vfio_put_device(&qat_vdev->core_device.vdev);
return ret;
}
static void qat_vf_vfio_pci_remove(struct pci_dev *pdev)
{
struct qat_vf_core_device *qat_vdev = qat_vf_drvdata(pdev);
vfio_pci_core_unregister_device(&qat_vdev->core_device);
vfio_put_device(&qat_vdev->core_device.vdev);
}
static const struct pci_device_id qat_vf_vfio_pci_table[] = {
/* Intel QAT GEN4 4xxx VF device */
{ PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_INTEL, 0x4941) },
{ PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_INTEL, 0x4943) },
{ PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_INTEL, 0x4945) },
{}
};
MODULE_DEVICE_TABLE(pci, qat_vf_vfio_pci_table);
static const struct pci_error_handlers qat_vf_err_handlers = {
.reset_done = qat_vf_pci_aer_reset_done,
.error_detected = vfio_pci_core_aer_err_detected,
};
static struct pci_driver qat_vf_vfio_pci_driver = {
.name = "qat_vfio_pci",
.id_table = qat_vf_vfio_pci_table,
.probe = qat_vf_vfio_pci_probe,
.remove = qat_vf_vfio_pci_remove,
.err_handler = &qat_vf_err_handlers,
.driver_managed_dma = true,
};
module_pci_driver(qat_vf_vfio_pci_driver);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Xin Zeng <xin.zeng@intel.com>");
MODULE_DESCRIPTION("QAT VFIO PCI - VFIO PCI driver with live migration support for Intel(R) QAT GEN4 device family");
MODULE_IMPORT_NS(CRYPTO_QAT);
...@@ -778,25 +778,26 @@ static int vfio_pci_count_devs(struct pci_dev *pdev, void *data) ...@@ -778,25 +778,26 @@ static int vfio_pci_count_devs(struct pci_dev *pdev, void *data)
} }
struct vfio_pci_fill_info { struct vfio_pci_fill_info {
struct vfio_pci_dependent_device __user *devices;
struct vfio_pci_dependent_device __user *devices_end;
struct vfio_device *vdev; struct vfio_device *vdev;
struct vfio_pci_dependent_device *devices;
int nr_devices;
u32 count; u32 count;
u32 flags; u32 flags;
}; };
static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
{ {
struct vfio_pci_dependent_device info = { struct vfio_pci_dependent_device *info;
.segment = pci_domain_nr(pdev->bus),
.bus = pdev->bus->number,
.devfn = pdev->devfn,
};
struct vfio_pci_fill_info *fill = data; struct vfio_pci_fill_info *fill = data;
fill->count++; /* The topology changed since we counted devices */
if (fill->devices >= fill->devices_end) if (fill->count >= fill->nr_devices)
return 0; return -EAGAIN;
info = &fill->devices[fill->count++];
info->segment = pci_domain_nr(pdev->bus);
info->bus = pdev->bus->number;
info->devfn = pdev->devfn;
if (fill->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID) { if (fill->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID) {
struct iommufd_ctx *iommufd = vfio_iommufd_device_ictx(fill->vdev); struct iommufd_ctx *iommufd = vfio_iommufd_device_ictx(fill->vdev);
...@@ -809,19 +810,19 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) ...@@ -809,19 +810,19 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
*/ */
vdev = vfio_find_device_in_devset(dev_set, &pdev->dev); vdev = vfio_find_device_in_devset(dev_set, &pdev->dev);
if (!vdev) { if (!vdev) {
info.devid = VFIO_PCI_DEVID_NOT_OWNED; info->devid = VFIO_PCI_DEVID_NOT_OWNED;
} else { } else {
int id = vfio_iommufd_get_dev_id(vdev, iommufd); int id = vfio_iommufd_get_dev_id(vdev, iommufd);
if (id > 0) if (id > 0)
info.devid = id; info->devid = id;
else if (id == -ENOENT) else if (id == -ENOENT)
info.devid = VFIO_PCI_DEVID_OWNED; info->devid = VFIO_PCI_DEVID_OWNED;
else else
info.devid = VFIO_PCI_DEVID_NOT_OWNED; info->devid = VFIO_PCI_DEVID_NOT_OWNED;
} }
/* If devid is VFIO_PCI_DEVID_NOT_OWNED, clear owned flag. */ /* If devid is VFIO_PCI_DEVID_NOT_OWNED, clear owned flag. */
if (info.devid == VFIO_PCI_DEVID_NOT_OWNED) if (info->devid == VFIO_PCI_DEVID_NOT_OWNED)
fill->flags &= ~VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED; fill->flags &= ~VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED;
} else { } else {
struct iommu_group *iommu_group; struct iommu_group *iommu_group;
...@@ -830,13 +831,10 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) ...@@ -830,13 +831,10 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
if (!iommu_group) if (!iommu_group)
return -EPERM; /* Cannot reset non-isolated devices */ return -EPERM; /* Cannot reset non-isolated devices */
info.group_id = iommu_group_id(iommu_group); info->group_id = iommu_group_id(iommu_group);
iommu_group_put(iommu_group); iommu_group_put(iommu_group);
} }
if (copy_to_user(fill->devices, &info, sizeof(info)))
return -EFAULT;
fill->devices++;
return 0; return 0;
} }
...@@ -1258,10 +1256,11 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info( ...@@ -1258,10 +1256,11 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info(
{ {
unsigned long minsz = unsigned long minsz =
offsetofend(struct vfio_pci_hot_reset_info, count); offsetofend(struct vfio_pci_hot_reset_info, count);
struct vfio_pci_dependent_device *devices = NULL;
struct vfio_pci_hot_reset_info hdr; struct vfio_pci_hot_reset_info hdr;
struct vfio_pci_fill_info fill = {}; struct vfio_pci_fill_info fill = {};
bool slot = false; bool slot = false;
int ret = 0; int ret, count;
if (copy_from_user(&hdr, arg, minsz)) if (copy_from_user(&hdr, arg, minsz))
return -EFAULT; return -EFAULT;
...@@ -1277,9 +1276,26 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info( ...@@ -1277,9 +1276,26 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info(
else if (pci_probe_reset_bus(vdev->pdev->bus)) else if (pci_probe_reset_bus(vdev->pdev->bus))
return -ENODEV; return -ENODEV;
fill.devices = arg->devices; ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs,
fill.devices_end = arg->devices + &count, slot);
(hdr.argsz - sizeof(hdr)) / sizeof(arg->devices[0]); if (ret)
return ret;
if (WARN_ON(!count)) /* Should always be at least one */
return -ERANGE;
if (count > (hdr.argsz - sizeof(hdr)) / sizeof(*devices)) {
hdr.count = count;
ret = -ENOSPC;
goto header;
}
devices = kcalloc(count, sizeof(*devices), GFP_KERNEL);
if (!devices)
return -ENOMEM;
fill.devices = devices;
fill.nr_devices = count;
fill.vdev = &vdev->vdev; fill.vdev = &vdev->vdev;
if (vfio_device_cdev_opened(&vdev->vdev)) if (vfio_device_cdev_opened(&vdev->vdev))
...@@ -1291,16 +1307,23 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info( ...@@ -1291,16 +1307,23 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info(
&fill, slot); &fill, slot);
mutex_unlock(&vdev->vdev.dev_set->lock); mutex_unlock(&vdev->vdev.dev_set->lock);
if (ret) if (ret)
return ret; goto out;
if (copy_to_user(arg->devices, devices,
sizeof(*devices) * fill.count)) {
ret = -EFAULT;
goto out;
}
hdr.count = fill.count; hdr.count = fill.count;
hdr.flags = fill.flags; hdr.flags = fill.flags;
if (copy_to_user(arg, &hdr, minsz))
return -EFAULT;
if (fill.count > fill.devices - arg->devices) header:
return -ENOSPC; if (copy_to_user(arg, &hdr, minsz))
return 0; ret = -EFAULT;
out:
kfree(devices);
return ret;
} }
static int static int
......
...@@ -23,11 +23,12 @@ ...@@ -23,11 +23,12 @@
#include "vfio_pci_priv.h" #include "vfio_pci_priv.h"
struct vfio_pci_irq_ctx { struct vfio_pci_irq_ctx {
struct eventfd_ctx *trigger; struct vfio_pci_core_device *vdev;
struct virqfd *unmask; struct eventfd_ctx *trigger;
struct virqfd *mask; struct virqfd *unmask;
char *name; struct virqfd *mask;
bool masked; char *name;
bool masked;
struct irq_bypass_producer producer; struct irq_bypass_producer producer;
}; };
...@@ -84,19 +85,14 @@ vfio_irq_ctx_alloc(struct vfio_pci_core_device *vdev, unsigned long index) ...@@ -84,19 +85,14 @@ vfio_irq_ctx_alloc(struct vfio_pci_core_device *vdev, unsigned long index)
/* /*
* INTx * INTx
*/ */
static void vfio_send_intx_eventfd(void *opaque, void *unused) static void vfio_send_intx_eventfd(void *opaque, void *data)
{ {
struct vfio_pci_core_device *vdev = opaque; struct vfio_pci_core_device *vdev = opaque;
if (likely(is_intx(vdev) && !vdev->virq_disabled)) { if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
struct vfio_pci_irq_ctx *ctx; struct vfio_pci_irq_ctx *ctx = data;
struct eventfd_ctx *trigger; struct eventfd_ctx *trigger = READ_ONCE(ctx->trigger);
ctx = vfio_irq_ctx_get(vdev, 0);
if (WARN_ON_ONCE(!ctx))
return;
trigger = READ_ONCE(ctx->trigger);
if (likely(trigger)) if (likely(trigger))
eventfd_signal(trigger); eventfd_signal(trigger);
} }
...@@ -166,11 +162,11 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) ...@@ -166,11 +162,11 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
* a signal is necessary, which can then be handled via a work queue * a signal is necessary, which can then be handled via a work queue
* or directly depending on the caller. * or directly depending on the caller.
*/ */
static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) static int vfio_pci_intx_unmask_handler(void *opaque, void *data)
{ {
struct vfio_pci_core_device *vdev = opaque; struct vfio_pci_core_device *vdev = opaque;
struct pci_dev *pdev = vdev->pdev; struct pci_dev *pdev = vdev->pdev;
struct vfio_pci_irq_ctx *ctx; struct vfio_pci_irq_ctx *ctx = data;
unsigned long flags; unsigned long flags;
int ret = 0; int ret = 0;
...@@ -186,10 +182,6 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) ...@@ -186,10 +182,6 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
goto out_unlock; goto out_unlock;
} }
ctx = vfio_irq_ctx_get(vdev, 0);
if (WARN_ON_ONCE(!ctx))
goto out_unlock;
if (ctx->masked && !vdev->virq_disabled) { if (ctx->masked && !vdev->virq_disabled) {
/* /*
* A pending interrupt here would immediately trigger, * A pending interrupt here would immediately trigger,
...@@ -213,10 +205,12 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) ...@@ -213,10 +205,12 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
static void __vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) static void __vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
{ {
struct vfio_pci_irq_ctx *ctx = vfio_irq_ctx_get(vdev, 0);
lockdep_assert_held(&vdev->igate); lockdep_assert_held(&vdev->igate);
if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0) if (vfio_pci_intx_unmask_handler(vdev, ctx) > 0)
vfio_send_intx_eventfd(vdev, NULL); vfio_send_intx_eventfd(vdev, ctx);
} }
void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
...@@ -228,15 +222,11 @@ void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) ...@@ -228,15 +222,11 @@ void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
static irqreturn_t vfio_intx_handler(int irq, void *dev_id) static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
{ {
struct vfio_pci_core_device *vdev = dev_id; struct vfio_pci_irq_ctx *ctx = dev_id;
struct vfio_pci_irq_ctx *ctx; struct vfio_pci_core_device *vdev = ctx->vdev;
unsigned long flags; unsigned long flags;
int ret = IRQ_NONE; int ret = IRQ_NONE;
ctx = vfio_irq_ctx_get(vdev, 0);
if (WARN_ON_ONCE(!ctx))
return ret;
spin_lock_irqsave(&vdev->irqlock, flags); spin_lock_irqsave(&vdev->irqlock, flags);
if (!vdev->pci_2_3) { if (!vdev->pci_2_3) {
...@@ -252,7 +242,7 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id) ...@@ -252,7 +242,7 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
spin_unlock_irqrestore(&vdev->irqlock, flags); spin_unlock_irqrestore(&vdev->irqlock, flags);
if (ret == IRQ_HANDLED) if (ret == IRQ_HANDLED)
vfio_send_intx_eventfd(vdev, NULL); vfio_send_intx_eventfd(vdev, ctx);
return ret; return ret;
} }
...@@ -277,11 +267,14 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev, ...@@ -277,11 +267,14 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev,
return -ENOMEM; return -ENOMEM;
ctx = vfio_irq_ctx_alloc(vdev, 0); ctx = vfio_irq_ctx_alloc(vdev, 0);
if (!ctx) if (!ctx) {
kfree(name);
return -ENOMEM; return -ENOMEM;
}
ctx->name = name; ctx->name = name;
ctx->trigger = trigger; ctx->trigger = trigger;
ctx->vdev = vdev;
/* /*
* Fill the initial masked state based on virq_disabled. After * Fill the initial masked state based on virq_disabled. After
...@@ -312,7 +305,7 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev, ...@@ -312,7 +305,7 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev,
vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX; vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
ret = request_irq(pdev->irq, vfio_intx_handler, ret = request_irq(pdev->irq, vfio_intx_handler,
irqflags, ctx->name, vdev); irqflags, ctx->name, ctx);
if (ret) { if (ret) {
vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->irq_type = VFIO_PCI_NUM_IRQS;
kfree(name); kfree(name);
...@@ -358,7 +351,7 @@ static void vfio_intx_disable(struct vfio_pci_core_device *vdev) ...@@ -358,7 +351,7 @@ static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
if (ctx) { if (ctx) {
vfio_virqfd_disable(&ctx->unmask); vfio_virqfd_disable(&ctx->unmask);
vfio_virqfd_disable(&ctx->mask); vfio_virqfd_disable(&ctx->mask);
free_irq(pdev->irq, vdev); free_irq(pdev->irq, ctx);
if (ctx->trigger) if (ctx->trigger)
eventfd_ctx_put(ctx->trigger); eventfd_ctx_put(ctx->trigger);
kfree(ctx->name); kfree(ctx->name);
...@@ -606,7 +599,7 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev, ...@@ -606,7 +599,7 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev,
if (fd >= 0) if (fd >= 0)
return vfio_virqfd_enable((void *) vdev, return vfio_virqfd_enable((void *) vdev,
vfio_pci_intx_unmask_handler, vfio_pci_intx_unmask_handler,
vfio_send_intx_eventfd, NULL, vfio_send_intx_eventfd, ctx,
&ctx->unmask, fd); &ctx->unmask, fd);
vfio_virqfd_disable(&ctx->unmask); vfio_virqfd_disable(&ctx->unmask);
...@@ -673,11 +666,11 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev, ...@@ -673,11 +666,11 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev,
return -EINVAL; return -EINVAL;
if (flags & VFIO_IRQ_SET_DATA_NONE) { if (flags & VFIO_IRQ_SET_DATA_NONE) {
vfio_send_intx_eventfd(vdev, NULL); vfio_send_intx_eventfd(vdev, vfio_irq_ctx_get(vdev, 0));
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) { } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
uint8_t trigger = *(uint8_t *)data; uint8_t trigger = *(uint8_t *)data;
if (trigger) if (trigger)
vfio_send_intx_eventfd(vdev, NULL); vfio_send_intx_eventfd(vdev, vfio_irq_ctx_get(vdev, 0));
} }
return 0; return 0;
} }
......
...@@ -676,6 +676,12 @@ int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nve ...@@ -676,6 +676,12 @@ int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nve
void platform_device_msi_free_irqs_all(struct device *dev); void platform_device_msi_free_irqs_all(struct device *dev);
bool msi_device_has_isolated_msi(struct device *dev); bool msi_device_has_isolated_msi(struct device *dev);
static inline int msi_domain_alloc_irqs(struct device *dev, unsigned int domid, int nirqs)
{
return msi_domain_alloc_irqs_range(dev, domid, 0, nirqs - 1);
}
#else /* CONFIG_GENERIC_MSI_IRQ */ #else /* CONFIG_GENERIC_MSI_IRQ */
static inline bool msi_device_has_isolated_msi(struct device *dev) static inline bool msi_device_has_isolated_msi(struct device *dev)
{ {
......
...@@ -1434,6 +1434,7 @@ int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid, ...@@ -1434,6 +1434,7 @@ int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
msi_unlock_descs(dev); msi_unlock_descs(dev);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(msi_domain_alloc_irqs_range);
/** /**
* msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain * msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain
...@@ -1680,6 +1681,7 @@ void msi_domain_free_irqs_range(struct device *dev, unsigned int domid, ...@@ -1680,6 +1681,7 @@ void msi_domain_free_irqs_range(struct device *dev, unsigned int domid,
msi_domain_free_irqs_range_locked(dev, domid, first, last); msi_domain_free_irqs_range_locked(dev, domid, first, last);
msi_unlock_descs(dev); msi_unlock_descs(dev);
} }
EXPORT_SYMBOL_GPL(msi_domain_free_irqs_all);
/** /**
* msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain * msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment