Commit 719bbd4a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfio-v5.12-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updatesfrom Alex Williamson:

 - Virtual address update handling (Steve Sistare)

 - s390/zpci fixes and cleanups (Max Gurtovoy)

 - Fixes for dirty bitmap handling, non-mdev page pinning, and improved
   pinned dirty scope tracking (Keqian Zhu)

 - Batched page pinning enhancement (Daniel Jordan)

 - Page access permission fix (Alex Williamson)

* tag 'vfio-v5.12-rc1' of git://github.com/awilliam/linux-vfio: (21 commits)
  vfio/type1: Batch page pinning
  vfio/type1: Prepare for batched pinning with struct vfio_batch
  vfio/type1: Change success value of vaddr_get_pfn()
  vfio/type1: Use follow_pte()
  vfio/pci: remove CONFIG_VFIO_PCI_ZDEV from Kconfig
  vfio/iommu_type1: Fix duplicate included kthread.h
  vfio-pci/zdev: fix possible segmentation fault issue
  vfio-pci/zdev: remove unused vdev argument
  vfio/pci: Fix handling of pci use accessor return codes
  vfio/iommu_type1: Mantain a counter for non_pinned_groups
  vfio/iommu_type1: Fix some sanity checks in detach group
  vfio/iommu_type1: Populate full dirty when detach non-pinned group
  vfio/type1: block on invalid vaddr
  vfio/type1: implement notify callback
  vfio: iommu driver notify callback
  vfio/type1: implement interfaces to update vaddr
  vfio/type1: massage unmap iteration
  vfio: interfaces to update vaddr
  vfio/type1: implement unmap all
  vfio/type1: unmap cleanup
  ...
parents c4fbde84 4d83de6d
...@@ -45,15 +45,3 @@ config VFIO_PCI_NVLINK2 ...@@ -45,15 +45,3 @@ config VFIO_PCI_NVLINK2
depends on VFIO_PCI && PPC_POWERNV depends on VFIO_PCI && PPC_POWERNV
help help
VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs
config VFIO_PCI_ZDEV
bool "VFIO PCI ZPCI device CLP support"
depends on VFIO_PCI && S390
default y
help
Enabling this option exposes VFIO capabilities containing hardware
configuration for zPCI devices. This enables userspace (e.g. QEMU)
to supply proper configuration values instead of hard-coded defaults
for zPCI devices passed through via VFIO on s390.
Say Y here.
...@@ -3,6 +3,6 @@ ...@@ -3,6 +3,6 @@
vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o
vfio-pci-$(CONFIG_VFIO_PCI_ZDEV) += vfio_pci_zdev.o vfio-pci-$(CONFIG_S390) += vfio_pci_zdev.o
obj-$(CONFIG_VFIO_PCI) += vfio-pci.o obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
...@@ -807,6 +807,7 @@ static long vfio_pci_ioctl(void *device_data, ...@@ -807,6 +807,7 @@ static long vfio_pci_ioctl(void *device_data,
struct vfio_device_info info; struct vfio_device_info info;
struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
unsigned long capsz; unsigned long capsz;
int ret;
minsz = offsetofend(struct vfio_device_info, num_irqs); minsz = offsetofend(struct vfio_device_info, num_irqs);
...@@ -832,14 +833,11 @@ static long vfio_pci_ioctl(void *device_data, ...@@ -832,14 +833,11 @@ static long vfio_pci_ioctl(void *device_data,
info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions; info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
info.num_irqs = VFIO_PCI_NUM_IRQS; info.num_irqs = VFIO_PCI_NUM_IRQS;
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV)) { ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
int ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
if (ret && ret != -ENODEV) { if (ret && ret != -ENODEV) {
pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n"); pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n");
return ret; return ret;
} }
}
if (caps.size) { if (caps.size) {
info.flags |= VFIO_DEVICE_FLAGS_CAPS; info.flags |= VFIO_DEVICE_FLAGS_CAPS;
......
...@@ -127,7 +127,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev, ...@@ -127,7 +127,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
ret = pci_user_read_config_byte(pdev, pos, &val); ret = pci_user_read_config_byte(pdev, pos, &val);
if (ret) if (ret)
return pcibios_err_to_errno(ret); return ret;
if (copy_to_user(buf + count - size, &val, 1)) if (copy_to_user(buf + count - size, &val, 1))
return -EFAULT; return -EFAULT;
...@@ -141,7 +141,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev, ...@@ -141,7 +141,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
ret = pci_user_read_config_word(pdev, pos, &val); ret = pci_user_read_config_word(pdev, pos, &val);
if (ret) if (ret)
return pcibios_err_to_errno(ret); return ret;
val = cpu_to_le16(val); val = cpu_to_le16(val);
if (copy_to_user(buf + count - size, &val, 2)) if (copy_to_user(buf + count - size, &val, 2))
...@@ -156,7 +156,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev, ...@@ -156,7 +156,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
ret = pci_user_read_config_dword(pdev, pos, &val); ret = pci_user_read_config_dword(pdev, pos, &val);
if (ret) if (ret)
return pcibios_err_to_errno(ret); return ret;
val = cpu_to_le32(val); val = cpu_to_le32(val);
if (copy_to_user(buf + count - size, &val, 4)) if (copy_to_user(buf + count - size, &val, 4))
...@@ -171,7 +171,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev, ...@@ -171,7 +171,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
ret = pci_user_read_config_word(pdev, pos, &val); ret = pci_user_read_config_word(pdev, pos, &val);
if (ret) if (ret)
return pcibios_err_to_errno(ret); return ret;
val = cpu_to_le16(val); val = cpu_to_le16(val);
if (copy_to_user(buf + count - size, &val, 2)) if (copy_to_user(buf + count - size, &val, 2))
...@@ -186,7 +186,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev, ...@@ -186,7 +186,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
ret = pci_user_read_config_byte(pdev, pos, &val); ret = pci_user_read_config_byte(pdev, pos, &val);
if (ret) if (ret)
return pcibios_err_to_errno(ret); return ret;
if (copy_to_user(buf + count - size, &val, 1)) if (copy_to_user(buf + count - size, &val, 1))
return -EFAULT; return -EFAULT;
......
...@@ -214,7 +214,7 @@ static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev) ...@@ -214,7 +214,7 @@ static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
} }
#endif #endif
#ifdef CONFIG_VFIO_PCI_ZDEV #ifdef CONFIG_S390
extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev, extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
struct vfio_info_cap *caps); struct vfio_info_cap *caps);
#else #else
......
...@@ -24,8 +24,7 @@ ...@@ -24,8 +24,7 @@
/* /*
* Add the Base PCI Function information to the device info region. * Add the Base PCI Function information to the device info region.
*/ */
static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps)
struct vfio_info_cap *caps)
{ {
struct vfio_device_info_cap_zpci_base cap = { struct vfio_device_info_cap_zpci_base cap = {
.header.id = VFIO_DEVICE_INFO_CAP_ZPCI_BASE, .header.id = VFIO_DEVICE_INFO_CAP_ZPCI_BASE,
...@@ -45,8 +44,7 @@ static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, ...@@ -45,8 +44,7 @@ static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
/* /*
* Add the Base PCI Function Group information to the device info region. * Add the Base PCI Function Group information to the device info region.
*/ */
static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps)
struct vfio_info_cap *caps)
{ {
struct vfio_device_info_cap_zpci_group cap = { struct vfio_device_info_cap_zpci_group cap = {
.header.id = VFIO_DEVICE_INFO_CAP_ZPCI_GROUP, .header.id = VFIO_DEVICE_INFO_CAP_ZPCI_GROUP,
...@@ -66,14 +64,15 @@ static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, ...@@ -66,14 +64,15 @@ static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
/* /*
* Add the device utility string to the device info region. * Add the device utility string to the device info region.
*/ */
static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps)
struct vfio_info_cap *caps)
{ {
struct vfio_device_info_cap_zpci_util *cap; struct vfio_device_info_cap_zpci_util *cap;
int cap_size = sizeof(*cap) + CLP_UTIL_STR_LEN; int cap_size = sizeof(*cap) + CLP_UTIL_STR_LEN;
int ret; int ret;
cap = kmalloc(cap_size, GFP_KERNEL); cap = kmalloc(cap_size, GFP_KERNEL);
if (!cap)
return -ENOMEM;
cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_UTIL; cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_UTIL;
cap->header.version = 1; cap->header.version = 1;
...@@ -90,14 +89,15 @@ static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, ...@@ -90,14 +89,15 @@ static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
/* /*
* Add the function path string to the device info region. * Add the function path string to the device info region.
*/ */
static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps)
struct vfio_info_cap *caps)
{ {
struct vfio_device_info_cap_zpci_pfip *cap; struct vfio_device_info_cap_zpci_pfip *cap;
int cap_size = sizeof(*cap) + CLP_PFIP_NR_SEGMENTS; int cap_size = sizeof(*cap) + CLP_PFIP_NR_SEGMENTS;
int ret; int ret;
cap = kmalloc(cap_size, GFP_KERNEL); cap = kmalloc(cap_size, GFP_KERNEL);
if (!cap)
return -ENOMEM;
cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_PFIP; cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_PFIP;
cap->header.version = 1; cap->header.version = 1;
...@@ -123,21 +123,21 @@ int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev, ...@@ -123,21 +123,21 @@ int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
if (!zdev) if (!zdev)
return -ENODEV; return -ENODEV;
ret = zpci_base_cap(zdev, vdev, caps); ret = zpci_base_cap(zdev, caps);
if (ret) if (ret)
return ret; return ret;
ret = zpci_group_cap(zdev, vdev, caps); ret = zpci_group_cap(zdev, caps);
if (ret) if (ret)
return ret; return ret;
if (zdev->util_str_avail) { if (zdev->util_str_avail) {
ret = zpci_util_cap(zdev, vdev, caps); ret = zpci_util_cap(zdev, caps);
if (ret) if (ret)
return ret; return ret;
} }
ret = zpci_pfip_cap(zdev, vdev, caps); ret = zpci_pfip_cap(zdev, caps);
return ret; return ret;
} }
...@@ -1220,6 +1220,11 @@ static int vfio_fops_open(struct inode *inode, struct file *filep) ...@@ -1220,6 +1220,11 @@ static int vfio_fops_open(struct inode *inode, struct file *filep)
static int vfio_fops_release(struct inode *inode, struct file *filep) static int vfio_fops_release(struct inode *inode, struct file *filep)
{ {
struct vfio_container *container = filep->private_data; struct vfio_container *container = filep->private_data;
struct vfio_iommu_driver *driver = container->iommu_driver;
if (driver && driver->ops->notify)
driver->ops->notify(container->iommu_data,
VFIO_IOMMU_CONTAINER_CLOSE);
filep->private_data = NULL; filep->private_data = NULL;
......
This diff is collapsed.
...@@ -57,6 +57,11 @@ extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); ...@@ -57,6 +57,11 @@ extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
extern void vfio_device_put(struct vfio_device *device); extern void vfio_device_put(struct vfio_device *device);
extern void *vfio_device_data(struct vfio_device *device); extern void *vfio_device_data(struct vfio_device *device);
/* events for the backend driver notify callback */
enum vfio_iommu_notify_type {
VFIO_IOMMU_CONTAINER_CLOSE = 0,
};
/** /**
* struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
*/ */
...@@ -92,6 +97,8 @@ struct vfio_iommu_driver_ops { ...@@ -92,6 +97,8 @@ struct vfio_iommu_driver_ops {
void *data, size_t count, bool write); void *data, size_t count, bool write);
struct iommu_domain *(*group_iommu_domain)(void *iommu_data, struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
struct iommu_group *group); struct iommu_group *group);
void (*notify)(void *iommu_data,
enum vfio_iommu_notify_type event);
}; };
extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
......
...@@ -46,6 +46,12 @@ ...@@ -46,6 +46,12 @@
*/ */
#define VFIO_NOIOMMU_IOMMU 8 #define VFIO_NOIOMMU_IOMMU 8
/* Supports VFIO_DMA_UNMAP_FLAG_ALL */
#define VFIO_UNMAP_ALL 9
/* Supports the vaddr flag for DMA map and unmap */
#define VFIO_UPDATE_VADDR 10
/* /*
* The IOCTL interface is designed for extensibility by embedding the * The IOCTL interface is designed for extensibility by embedding the
* structure length (argsz) and flags into structures passed between * structure length (argsz) and flags into structures passed between
...@@ -1074,12 +1080,22 @@ struct vfio_iommu_type1_info_dma_avail { ...@@ -1074,12 +1080,22 @@ struct vfio_iommu_type1_info_dma_avail {
* *
* Map process virtual addresses to IO virtual addresses using the * Map process virtual addresses to IO virtual addresses using the
* provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required. * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
*
* If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and
* unblock translation of host virtual addresses in the iova range. The vaddr
* must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR. To
* maintain memory consistency within the user application, the updated vaddr
* must address the same memory object as originally mapped. Failure to do so
* will result in user memory corruption and/or device misbehavior. iova and
* size must match those in the original MAP_DMA call. Protection is not
* changed, and the READ & WRITE flags must be 0.
*/ */
struct vfio_iommu_type1_dma_map { struct vfio_iommu_type1_dma_map {
__u32 argsz; __u32 argsz;
__u32 flags; __u32 flags;
#define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */ #define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */
#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */ #define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */
#define VFIO_DMA_MAP_FLAG_VADDR (1 << 2)
__u64 vaddr; /* Process virtual address */ __u64 vaddr; /* Process virtual address */
__u64 iova; /* IO virtual address */ __u64 iova; /* IO virtual address */
__u64 size; /* Size of mapping (bytes) */ __u64 size; /* Size of mapping (bytes) */
...@@ -1102,6 +1118,7 @@ struct vfio_bitmap { ...@@ -1102,6 +1118,7 @@ struct vfio_bitmap {
* field. No guarantee is made to the user that arbitrary unmaps of iova * field. No guarantee is made to the user that arbitrary unmaps of iova
* or size different from those used in the original mapping call will * or size different from those used in the original mapping call will
* succeed. * succeed.
*
* VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap
* before unmapping IO virtual addresses. When this flag is set, the user must * before unmapping IO virtual addresses. When this flag is set, the user must
* provide a struct vfio_bitmap in data[]. User must provide zero-allocated * provide a struct vfio_bitmap in data[]. User must provide zero-allocated
...@@ -1111,11 +1128,21 @@ struct vfio_bitmap { ...@@ -1111,11 +1128,21 @@ struct vfio_bitmap {
* indicates that the page at that offset from iova is dirty. A Bitmap of the * indicates that the page at that offset from iova is dirty. A Bitmap of the
* pages in the range of unmapped size is returned in the user-provided * pages in the range of unmapped size is returned in the user-provided
* vfio_bitmap.data. * vfio_bitmap.data.
*
* If flags & VFIO_DMA_UNMAP_FLAG_ALL, unmap all addresses. iova and size
* must be 0. This cannot be combined with the get-dirty-bitmap flag.
*
* If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host
* virtual addresses in the iova range. Tasks that attempt to translate an
* iova's vaddr will block. DMA to already-mapped pages continues. This
* cannot be combined with the get-dirty-bitmap flag.
*/ */
struct vfio_iommu_type1_dma_unmap { struct vfio_iommu_type1_dma_unmap {
__u32 argsz; __u32 argsz;
__u32 flags; __u32 flags;
#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0) #define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0)
#define VFIO_DMA_UNMAP_FLAG_ALL (1 << 1)
#define VFIO_DMA_UNMAP_FLAG_VADDR (1 << 2)
__u64 iova; /* IO virtual address */ __u64 iova; /* IO virtual address */
__u64 size; /* Size of mapping (bytes) */ __u64 size; /* Size of mapping (bytes) */
__u8 data[]; __u8 data[];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment