Commit 719bbd4a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfio-v5.12-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updatesfrom Alex Williamson:

 - Virtual address update handling (Steve Sistare)

 - s390/zpci fixes and cleanups (Max Gurtovoy)

 - Fixes for dirty bitmap handling, non-mdev page pinning, and improved
   pinned dirty scope tracking (Keqian Zhu)

 - Batched page pinning enhancement (Daniel Jordan)

 - Page access permission fix (Alex Williamson)

* tag 'vfio-v5.12-rc1' of git://github.com/awilliam/linux-vfio: (21 commits)
  vfio/type1: Batch page pinning
  vfio/type1: Prepare for batched pinning with struct vfio_batch
  vfio/type1: Change success value of vaddr_get_pfn()
  vfio/type1: Use follow_pte()
  vfio/pci: remove CONFIG_VFIO_PCI_ZDEV from Kconfig
  vfio/iommu_type1: Fix duplicate included kthread.h
  vfio-pci/zdev: fix possible segmentation fault issue
  vfio-pci/zdev: remove unused vdev argument
  vfio/pci: Fix handling of pci use accessor return codes
  vfio/iommu_type1: Mantain a counter for non_pinned_groups
  vfio/iommu_type1: Fix some sanity checks in detach group
  vfio/iommu_type1: Populate full dirty when detach non-pinned group
  vfio/type1: block on invalid vaddr
  vfio/type1: implement notify callback
  vfio: iommu driver notify callback
  vfio/type1: implement interfaces to update vaddr
  vfio/type1: massage unmap iteration
  vfio: interfaces to update vaddr
  vfio/type1: implement unmap all
  vfio/type1: unmap cleanup
  ...
parents c4fbde84 4d83de6d
......@@ -45,15 +45,3 @@ config VFIO_PCI_NVLINK2
depends on VFIO_PCI && PPC_POWERNV
help
VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs
config VFIO_PCI_ZDEV
bool "VFIO PCI ZPCI device CLP support"
depends on VFIO_PCI && S390
default y
help
Enabling this option exposes VFIO capabilities containing hardware
configuration for zPCI devices. This enables userspace (e.g. QEMU)
to supply proper configuration values instead of hard-coded defaults
for zPCI devices passed through via VFIO on s390.
Say Y here.
......@@ -3,6 +3,6 @@
vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o
vfio-pci-$(CONFIG_VFIO_PCI_ZDEV) += vfio_pci_zdev.o
vfio-pci-$(CONFIG_S390) += vfio_pci_zdev.o
obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
......@@ -807,6 +807,7 @@ static long vfio_pci_ioctl(void *device_data,
struct vfio_device_info info;
struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
unsigned long capsz;
int ret;
minsz = offsetofend(struct vfio_device_info, num_irqs);
......@@ -832,13 +833,10 @@ static long vfio_pci_ioctl(void *device_data,
info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
info.num_irqs = VFIO_PCI_NUM_IRQS;
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV)) {
int ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
if (ret && ret != -ENODEV) {
pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n");
return ret;
}
ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
if (ret && ret != -ENODEV) {
pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n");
return ret;
}
if (caps.size) {
......
......@@ -127,7 +127,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
ret = pci_user_read_config_byte(pdev, pos, &val);
if (ret)
return pcibios_err_to_errno(ret);
return ret;
if (copy_to_user(buf + count - size, &val, 1))
return -EFAULT;
......@@ -141,7 +141,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
ret = pci_user_read_config_word(pdev, pos, &val);
if (ret)
return pcibios_err_to_errno(ret);
return ret;
val = cpu_to_le16(val);
if (copy_to_user(buf + count - size, &val, 2))
......@@ -156,7 +156,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
ret = pci_user_read_config_dword(pdev, pos, &val);
if (ret)
return pcibios_err_to_errno(ret);
return ret;
val = cpu_to_le32(val);
if (copy_to_user(buf + count - size, &val, 4))
......@@ -171,7 +171,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
ret = pci_user_read_config_word(pdev, pos, &val);
if (ret)
return pcibios_err_to_errno(ret);
return ret;
val = cpu_to_le16(val);
if (copy_to_user(buf + count - size, &val, 2))
......@@ -186,7 +186,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
ret = pci_user_read_config_byte(pdev, pos, &val);
if (ret)
return pcibios_err_to_errno(ret);
return ret;
if (copy_to_user(buf + count - size, &val, 1))
return -EFAULT;
......
......@@ -214,7 +214,7 @@ static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
}
#endif
#ifdef CONFIG_VFIO_PCI_ZDEV
#ifdef CONFIG_S390
extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
struct vfio_info_cap *caps);
#else
......
......@@ -24,8 +24,7 @@
/*
* Add the Base PCI Function information to the device info region.
*/
static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
struct vfio_info_cap *caps)
static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps)
{
struct vfio_device_info_cap_zpci_base cap = {
.header.id = VFIO_DEVICE_INFO_CAP_ZPCI_BASE,
......@@ -45,8 +44,7 @@ static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
/*
* Add the Base PCI Function Group information to the device info region.
*/
static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
struct vfio_info_cap *caps)
static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps)
{
struct vfio_device_info_cap_zpci_group cap = {
.header.id = VFIO_DEVICE_INFO_CAP_ZPCI_GROUP,
......@@ -66,14 +64,15 @@ static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
/*
* Add the device utility string to the device info region.
*/
static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
struct vfio_info_cap *caps)
static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps)
{
struct vfio_device_info_cap_zpci_util *cap;
int cap_size = sizeof(*cap) + CLP_UTIL_STR_LEN;
int ret;
cap = kmalloc(cap_size, GFP_KERNEL);
if (!cap)
return -ENOMEM;
cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_UTIL;
cap->header.version = 1;
......@@ -90,14 +89,15 @@ static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
/*
* Add the function path string to the device info region.
*/
static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
struct vfio_info_cap *caps)
static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps)
{
struct vfio_device_info_cap_zpci_pfip *cap;
int cap_size = sizeof(*cap) + CLP_PFIP_NR_SEGMENTS;
int ret;
cap = kmalloc(cap_size, GFP_KERNEL);
if (!cap)
return -ENOMEM;
cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_PFIP;
cap->header.version = 1;
......@@ -123,21 +123,21 @@ int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
if (!zdev)
return -ENODEV;
ret = zpci_base_cap(zdev, vdev, caps);
ret = zpci_base_cap(zdev, caps);
if (ret)
return ret;
ret = zpci_group_cap(zdev, vdev, caps);
ret = zpci_group_cap(zdev, caps);
if (ret)
return ret;
if (zdev->util_str_avail) {
ret = zpci_util_cap(zdev, vdev, caps);
ret = zpci_util_cap(zdev, caps);
if (ret)
return ret;
}
ret = zpci_pfip_cap(zdev, vdev, caps);
ret = zpci_pfip_cap(zdev, caps);
return ret;
}
......@@ -1220,6 +1220,11 @@ static int vfio_fops_open(struct inode *inode, struct file *filep)
static int vfio_fops_release(struct inode *inode, struct file *filep)
{
struct vfio_container *container = filep->private_data;
struct vfio_iommu_driver *driver = container->iommu_driver;
if (driver && driver->ops->notify)
driver->ops->notify(container->iommu_data,
VFIO_IOMMU_CONTAINER_CLOSE);
filep->private_data = NULL;
......
This diff is collapsed.
......@@ -57,6 +57,11 @@ extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
extern void vfio_device_put(struct vfio_device *device);
extern void *vfio_device_data(struct vfio_device *device);
/* events for the backend driver notify callback */
enum vfio_iommu_notify_type {
VFIO_IOMMU_CONTAINER_CLOSE = 0,
};
/**
* struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
*/
......@@ -92,6 +97,8 @@ struct vfio_iommu_driver_ops {
void *data, size_t count, bool write);
struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
struct iommu_group *group);
void (*notify)(void *iommu_data,
enum vfio_iommu_notify_type event);
};
extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
......
......@@ -46,6 +46,12 @@
*/
#define VFIO_NOIOMMU_IOMMU 8
/* Supports VFIO_DMA_UNMAP_FLAG_ALL */
#define VFIO_UNMAP_ALL 9
/* Supports the vaddr flag for DMA map and unmap */
#define VFIO_UPDATE_VADDR 10
/*
* The IOCTL interface is designed for extensibility by embedding the
* structure length (argsz) and flags into structures passed between
......@@ -1074,12 +1080,22 @@ struct vfio_iommu_type1_info_dma_avail {
*
* Map process virtual addresses to IO virtual addresses using the
* provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
*
* If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and
* unblock translation of host virtual addresses in the iova range. The vaddr
* must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR. To
* maintain memory consistency within the user application, the updated vaddr
* must address the same memory object as originally mapped. Failure to do so
* will result in user memory corruption and/or device misbehavior. iova and
* size must match those in the original MAP_DMA call. Protection is not
* changed, and the READ & WRITE flags must be 0.
*/
struct vfio_iommu_type1_dma_map {
__u32 argsz;
__u32 flags;
#define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */
#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */
#define VFIO_DMA_MAP_FLAG_VADDR (1 << 2)
__u64 vaddr; /* Process virtual address */
__u64 iova; /* IO virtual address */
__u64 size; /* Size of mapping (bytes) */
......@@ -1102,6 +1118,7 @@ struct vfio_bitmap {
* field. No guarantee is made to the user that arbitrary unmaps of iova
* or size different from those used in the original mapping call will
* succeed.
*
* VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap
* before unmapping IO virtual addresses. When this flag is set, the user must
* provide a struct vfio_bitmap in data[]. User must provide zero-allocated
......@@ -1111,11 +1128,21 @@ struct vfio_bitmap {
* indicates that the page at that offset from iova is dirty. A Bitmap of the
* pages in the range of unmapped size is returned in the user-provided
* vfio_bitmap.data.
*
* If flags & VFIO_DMA_UNMAP_FLAG_ALL, unmap all addresses. iova and size
* must be 0. This cannot be combined with the get-dirty-bitmap flag.
*
* If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host
* virtual addresses in the iova range. Tasks that attempt to translate an
* iova's vaddr will block. DMA to already-mapped pages continues. This
* cannot be combined with the get-dirty-bitmap flag.
*/
struct vfio_iommu_type1_dma_unmap {
__u32 argsz;
__u32 flags;
#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0)
#define VFIO_DMA_UNMAP_FLAG_ALL (1 << 1)
#define VFIO_DMA_UNMAP_FLAG_VADDR (1 << 2)
__u64 iova; /* IO virtual address */
__u64 size; /* Size of mapping (bytes) */
__u8 data[];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment