Commit 89e1f7d4 authored by Alex Williamson's avatar Alex Williamson

vfio: Add PCI device driver

Add PCI device support for VFIO.  PCI devices expose regions
for accessing config space, I/O port space, and MMIO areas
of the device.  PCI config access is virtualized in the kernel,
allowing us to ensure the integrity of the system, by preventing
various accesses while reducing duplicate support across various
userspace drivers.  I/O port supports read/write access while
MMIO also supports mmap of sufficiently sized regions.  Support
for INTx, MSI, and MSI-X interrupts are provided using eventfds to
userspace.
Signed-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent 73fa0d10
...@@ -12,3 +12,5 @@ menuconfig VFIO ...@@ -12,3 +12,5 @@ menuconfig VFIO
See Documentation/vfio.txt for more details. See Documentation/vfio.txt for more details.
If you don't know what to do here, say N. If you don't know what to do here, say N.
source "drivers/vfio/pci/Kconfig"
config VFIO_PCI
tristate "VFIO support for PCI devices"
depends on VFIO && PCI && EVENTFD
help
Support for the PCI VFIO bus driver. This is required to make
use of PCI drivers using the VFIO framework.
If you don't know what to do here, say N.
vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/*
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
* Author: Alex Williamson <alex.williamson@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Derived from original vfio:
* Copyright 2010 Cisco Systems, Inc. All rights reserved.
* Author: Tom Lyon, pugs@cisco.com
*/
#include <linux/mutex.h>
#include <linux/pci.h>
#ifndef VFIO_PCI_PRIVATE_H
#define VFIO_PCI_PRIVATE_H
#define VFIO_PCI_OFFSET_SHIFT 40
#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
struct vfio_pci_irq_ctx {
struct eventfd_ctx *trigger;
struct virqfd *unmask;
struct virqfd *mask;
char *name;
bool masked;
};
struct vfio_pci_device {
struct pci_dev *pdev;
void __iomem *barmap[PCI_STD_RESOURCE_END + 1];
u8 *pci_config_map;
u8 *vconfig;
struct perm_bits *msi_perm;
spinlock_t irqlock;
struct mutex igate;
struct msix_entry *msix;
struct vfio_pci_irq_ctx *ctx;
int num_ctx;
int irq_type;
u8 msi_qmax;
u8 msix_bar;
u16 msix_size;
u32 msix_offset;
u32 rbar[7];
bool pci_2_3;
bool virq_disabled;
bool reset_works;
bool extended_caps;
bool bardirty;
struct pci_saved_state *pci_saved_state;
atomic_t refcnt;
};
#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
#define is_msi(vdev) (vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX)
#define is_msix(vdev) (vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
#define is_irq_none(vdev) (!(is_intx(vdev) || is_msi(vdev) || is_msix(vdev)))
#define irq_is(vdev, type) (vdev->irq_type == type)
extern void vfio_pci_intx_mask(struct vfio_pci_device *vdev);
extern void vfio_pci_intx_unmask(struct vfio_pci_device *vdev);
extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev,
uint32_t flags, unsigned index,
unsigned start, unsigned count, void *data);
extern ssize_t vfio_pci_config_readwrite(struct vfio_pci_device *vdev,
char __user *buf, size_t count,
loff_t *ppos, bool iswrite);
extern ssize_t vfio_pci_mem_readwrite(struct vfio_pci_device *vdev,
char __user *buf, size_t count,
loff_t *ppos, bool iswrite);
extern ssize_t vfio_pci_io_readwrite(struct vfio_pci_device *vdev,
char __user *buf, size_t count,
loff_t *ppos, bool iswrite);
extern int vfio_pci_init_perm_bits(void);
extern void vfio_pci_uninit_perm_bits(void);
extern int vfio_pci_virqfd_init(void);
extern void vfio_pci_virqfd_exit(void);
extern int vfio_config_init(struct vfio_pci_device *vdev);
extern void vfio_config_free(struct vfio_pci_device *vdev);
#endif /* VFIO_PCI_PRIVATE_H */
/*
* VFIO PCI I/O Port & MMIO access
*
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
* Author: Alex Williamson <alex.williamson@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Derived from original vfio:
* Copyright 2010 Cisco Systems, Inc. All rights reserved.
* Author: Tom Lyon, pugs@cisco.com
*/
#include <linux/fs.h>
#include <linux/pci.h>
#include <linux/uaccess.h>
#include <linux/io.h>
#include "vfio_pci_private.h"
/* I/O Port BAR access */
ssize_t vfio_pci_io_readwrite(struct vfio_pci_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite)
{
struct pci_dev *pdev = vdev->pdev;
loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
void __iomem *io;
size_t done = 0;
if (!pci_resource_start(pdev, bar))
return -EINVAL;
if (pos + count > pci_resource_len(pdev, bar))
return -EINVAL;
if (!vdev->barmap[bar]) {
int ret;
ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
if (ret)
return ret;
vdev->barmap[bar] = pci_iomap(pdev, bar, 0);
if (!vdev->barmap[bar]) {
pci_release_selected_regions(pdev, 1 << bar);
return -EINVAL;
}
}
io = vdev->barmap[bar];
while (count) {
int filled;
if (count >= 3 && !(pos % 4)) {
__le32 val;
if (iswrite) {
if (copy_from_user(&val, buf, 4))
return -EFAULT;
iowrite32(le32_to_cpu(val), io + pos);
} else {
val = cpu_to_le32(ioread32(io + pos));
if (copy_to_user(buf, &val, 4))
return -EFAULT;
}
filled = 4;
} else if ((pos % 2) == 0 && count >= 2) {
__le16 val;
if (iswrite) {
if (copy_from_user(&val, buf, 2))
return -EFAULT;
iowrite16(le16_to_cpu(val), io + pos);
} else {
val = cpu_to_le16(ioread16(io + pos));
if (copy_to_user(buf, &val, 2))
return -EFAULT;
}
filled = 2;
} else {
u8 val;
if (iswrite) {
if (copy_from_user(&val, buf, 1))
return -EFAULT;
iowrite8(val, io + pos);
} else {
val = ioread8(io + pos);
if (copy_to_user(buf, &val, 1))
return -EFAULT;
}
filled = 1;
}
count -= filled;
done += filled;
buf += filled;
pos += filled;
}
*ppos += done;
return done;
}
/*
* MMIO BAR access
* We handle two excluded ranges here as well, if the user tries to read
* the ROM beyond what PCI tells us is available or the MSI-X table region,
* we return 0xFF and writes are dropped.
*/
ssize_t vfio_pci_mem_readwrite(struct vfio_pci_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite)
{
struct pci_dev *pdev = vdev->pdev;
loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
void __iomem *io;
resource_size_t end;
size_t done = 0;
size_t x_start = 0, x_end = 0; /* excluded range */
if (!pci_resource_start(pdev, bar))
return -EINVAL;
end = pci_resource_len(pdev, bar);
if (pos > end)
return -EINVAL;
if (pos == end)
return 0;
if (pos + count > end)
count = end - pos;
if (bar == PCI_ROM_RESOURCE) {
io = pci_map_rom(pdev, &x_start);
x_end = end;
} else {
if (!vdev->barmap[bar]) {
int ret;
ret = pci_request_selected_regions(pdev, 1 << bar,
"vfio");
if (ret)
return ret;
vdev->barmap[bar] = pci_iomap(pdev, bar, 0);
if (!vdev->barmap[bar]) {
pci_release_selected_regions(pdev, 1 << bar);
return -EINVAL;
}
}
io = vdev->barmap[bar];
if (bar == vdev->msix_bar) {
x_start = vdev->msix_offset;
x_end = vdev->msix_offset + vdev->msix_size;
}
}
if (!io)
return -EINVAL;
while (count) {
size_t fillable, filled;
if (pos < x_start)
fillable = x_start - pos;
else if (pos >= x_end)
fillable = end - pos;
else
fillable = 0;
if (fillable >= 4 && !(pos % 4) && (count >= 4)) {
__le32 val;
if (iswrite) {
if (copy_from_user(&val, buf, 4))
goto out;
iowrite32(le32_to_cpu(val), io + pos);
} else {
val = cpu_to_le32(ioread32(io + pos));
if (copy_to_user(buf, &val, 4))
goto out;
}
filled = 4;
} else if (fillable >= 2 && !(pos % 2) && (count >= 2)) {
__le16 val;
if (iswrite) {
if (copy_from_user(&val, buf, 2))
goto out;
iowrite16(le16_to_cpu(val), io + pos);
} else {
val = cpu_to_le16(ioread16(io + pos));
if (copy_to_user(buf, &val, 2))
goto out;
}
filled = 2;
} else if (fillable) {
u8 val;
if (iswrite) {
if (copy_from_user(&val, buf, 1))
goto out;
iowrite8(val, io + pos);
} else {
val = ioread8(io + pos);
if (copy_to_user(buf, &val, 1))
goto out;
}
filled = 1;
} else {
/* Drop writes, fill reads with FF */
if (!iswrite) {
char val = 0xFF;
size_t i;
for (i = 0; i < x_end - pos; i++) {
if (put_user(val, buf + i))
goto out;
}
}
filled = x_end - pos;
}
count -= filled;
done += filled;
buf += filled;
pos += filled;
}
*ppos += done;
out:
if (bar == PCI_ROM_RESOURCE)
pci_unmap_rom(pdev, io);
return count ? -EFAULT : done;
}
...@@ -223,6 +223,7 @@ struct vfio_device_info { ...@@ -223,6 +223,7 @@ struct vfio_device_info {
__u32 argsz; __u32 argsz;
__u32 flags; __u32 flags;
#define VFIO_DEVICE_FLAGS_RESET (1 << 0) /* Device supports reset */ #define VFIO_DEVICE_FLAGS_RESET (1 << 0) /* Device supports reset */
#define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */
__u32 num_regions; /* Max region index + 1 */ __u32 num_regions; /* Max region index + 1 */
__u32 num_irqs; /* Max IRQ index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */
}; };
...@@ -364,6 +365,31 @@ struct vfio_irq_set { ...@@ -364,6 +365,31 @@ struct vfio_irq_set {
*/ */
#define VFIO_DEVICE_RESET _IO(VFIO_TYPE, VFIO_BASE + 11) #define VFIO_DEVICE_RESET _IO(VFIO_TYPE, VFIO_BASE + 11)
/*
* The VFIO-PCI bus driver makes use of the following fixed region and
* IRQ index mapping. Unimplemented regions return a size of zero.
* Unimplemented IRQ types return a count of zero.
*/
enum {
VFIO_PCI_BAR0_REGION_INDEX,
VFIO_PCI_BAR1_REGION_INDEX,
VFIO_PCI_BAR2_REGION_INDEX,
VFIO_PCI_BAR3_REGION_INDEX,
VFIO_PCI_BAR4_REGION_INDEX,
VFIO_PCI_BAR5_REGION_INDEX,
VFIO_PCI_ROM_REGION_INDEX,
VFIO_PCI_CONFIG_REGION_INDEX,
VFIO_PCI_NUM_REGIONS
};
enum {
VFIO_PCI_INTX_IRQ_INDEX,
VFIO_PCI_MSI_IRQ_INDEX,
VFIO_PCI_MSIX_IRQ_INDEX,
VFIO_PCI_NUM_IRQS
};
/* -------- API for Type1 VFIO IOMMU -------- */ /* -------- API for Type1 VFIO IOMMU -------- */
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment