Commit 1c810652 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (53 commits)
  iommu/amd: Set IOTLB invalidation timeout
  iommu/amd: Init stats for iommu=pt
  iommu/amd: Remove unnecessary cache flushes in amd_iommu_resume
  iommu/amd: Add invalidate-context call-back
  iommu/amd: Add amd_iommu_device_info() function
  iommu/amd: Adapt IOMMU driver to PCI register name changes
  iommu/amd: Add invalid_ppr callback
  iommu/amd: Implement notifiers for IOMMUv2
  iommu/amd: Implement IO page-fault handler
  iommu/amd: Add routines to bind/unbind a pasid
  iommu/amd: Implement device aquisition code for IOMMUv2
  iommu/amd: Add driver stub for AMD IOMMUv2 support
  iommu/amd: Add stat counter for IOMMUv2 events
  iommu/amd: Add device errata handling
  iommu/amd: Add function to get IOMMUv2 domain for pdev
  iommu/amd: Implement function to send PPR completions
  iommu/amd: Implement functions to manage GCR3 table
  iommu/amd: Implement IOMMUv2 TLB flushing routines
  iommu/amd: Add support for IOMMUv2 domain mode
  iommu/amd: Add amd_iommu_domain_direct_map function
  ...
parents 1a464cbb f93ea733
......@@ -66,6 +66,24 @@ Description:
re-discover previously removed devices.
Depends on CONFIG_HOTPLUG.
What: /sys/bus/pci/devices/.../msi_irqs/
Date: September, 2011
Contact: Neil Horman <nhorman@tuxdriver.com>
Description:
The /sys/devices/.../msi_irqs directory contains a variable set
of sub-directories, with each sub-directory being named after a
corresponding msi irq vector allocated to that device. Each
numbered sub-directory N contains attributes of that irq.
Note that this directory is not created for device drivers which
do not support msi irqs
What: /sys/bus/pci/devices/.../msi_irqs/<N>/mode
Date: September 2011
Contact: Neil Horman <nhorman@tuxdriver.com>
Description:
This attribute indicates the mode that the irq vector named by
the parent directory is in (msi vs. msix)
What: /sys/bus/pci/devices/.../remove
Date: January 2009
Contact: Linux PCI developers <linux-pci@vger.kernel.org>
......
......@@ -329,6 +329,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
is a lot of faster
off - do not initialize any AMD IOMMU found in
the system
force_isolation - Force device isolation for all
devices. The IOMMU driver is not
allowed anymore to lift isolation
requirements as needed. This option
does not override iommu=pt
amijoy.map= [HW,JOY] Amiga joystick support
Map of devices attached to JOY0DAT and JOY1DAT
......@@ -1059,7 +1064,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nomerge
forcesac
soft
pt [x86, IA-64]
pt [x86, IA-64]
group_mf [x86, IA-64]
io7= [HW] IO7 for Marvel based alpha systems
See comment before marvel_specify_io7 in
......
......@@ -28,6 +28,7 @@
#include <plat/board.h>
#include <plat/mcbsp.h>
#include <plat/mmc.h>
#include <plat/iommu.h>
#include <plat/dma.h>
#include <plat/omap_hwmod.h>
#include <plat/omap_device.h>
......@@ -211,9 +212,15 @@ static struct platform_device omap3isp_device = {
.resource = omap3isp_resources,
};
static struct omap_iommu_arch_data omap3_isp_iommu = {
.name = "isp",
};
int omap3_init_camera(struct isp_platform_data *pdata)
{
omap3isp_device.dev.platform_data = pdata;
omap3isp_device.dev.archdata.iommu = &omap3_isp_iommu;
return platform_device_register(&omap3isp_device);
}
......
......@@ -111,6 +111,32 @@ struct iommu_platform_data {
u32 da_end;
};
/**
* struct iommu_arch_data - omap iommu private data
* @name: name of the iommu device
* @iommu_dev: handle of the iommu device
*
* This is an omap iommu private data object, which binds an iommu user
* to its iommu device. This object should be placed at the iommu user's
* dev_archdata so generic IOMMU API can be used without having to
* utilize omap-specific plumbing anymore.
*/
struct omap_iommu_arch_data {
const char *name;
struct omap_iommu *iommu_dev;
};
/**
* dev_to_omap_iommu() - retrieves an omap iommu object from a user device
* @dev: iommu client device
*/
static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
{
struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
return arch_data->iommu_dev;
}
/* IOMMU errors */
#define OMAP_IOMMU_ERR_TLB_MISS (1 << 0)
#define OMAP_IOMMU_ERR_TRANS_FAULT (1 << 1)
......@@ -163,8 +189,8 @@ extern int omap_iommu_set_isr(const char *name,
void *priv),
void *isr_priv);
extern void omap_iommu_save_ctx(struct omap_iommu *obj);
extern void omap_iommu_restore_ctx(struct omap_iommu *obj);
extern void omap_iommu_save_ctx(struct device *dev);
extern void omap_iommu_restore_ctx(struct device *dev);
extern int omap_install_iommu_arch(const struct iommu_functions *ops);
extern void omap_uninstall_iommu_arch(const struct iommu_functions *ops);
......@@ -176,6 +202,5 @@ extern ssize_t
omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len);
extern size_t
omap_dump_tlb_entries(struct omap_iommu *obj, char *buf, ssize_t len);
struct device *omap_find_iommu_device(const char *name);
#endif /* __MACH_IOMMU_H */
......@@ -72,18 +72,18 @@ struct iovm_struct {
#define IOVMF_DA_FIXED (1 << (4 + IOVMF_SW_SHIFT))
extern struct iovm_struct *omap_find_iovm_area(struct omap_iommu *obj, u32 da);
extern struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da);
extern u32
omap_iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj, u32 da,
omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da,
const struct sg_table *sgt, u32 flags);
extern struct sg_table *omap_iommu_vunmap(struct iommu_domain *domain,
struct omap_iommu *obj, u32 da);
struct device *dev, u32 da);
extern u32
omap_iommu_vmalloc(struct iommu_domain *domain, struct omap_iommu *obj,
omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev,
u32 da, size_t bytes, u32 flags);
extern void
omap_iommu_vfree(struct iommu_domain *domain, struct omap_iommu *obj,
omap_iommu_vfree(struct iommu_domain *domain, struct device *dev,
const u32 da);
extern void *omap_da_to_va(struct omap_iommu *obj, u32 da);
extern void *omap_da_to_va(struct device *dev, u32 da);
#endif /* __IOMMU_MMAP_H */
......@@ -11,10 +11,12 @@ extern void no_iommu_init(void);
extern int force_iommu, no_iommu;
extern int iommu_pass_through;
extern int iommu_detected;
extern int iommu_group_mf;
#else
#define iommu_pass_through (0)
#define no_iommu (1)
#define iommu_detected (0)
#define iommu_group_mf (0)
#endif
extern void iommu_dma_init(void);
extern void machvec_init(const char *name);
......
......@@ -33,6 +33,7 @@ int force_iommu __read_mostly;
#endif
int iommu_pass_through;
int iommu_group_mf;
/* Dummy device used for NULL arguments (normally ISA). Better would
be probably a smaller DMA mask, but this is bug-to-bug compatible
......
......@@ -5,6 +5,7 @@ extern struct dma_map_ops nommu_dma_ops;
extern int force_iommu, no_iommu;
extern int iommu_detected;
extern int iommu_pass_through;
extern int iommu_group_mf;
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
......
......@@ -45,6 +45,15 @@ int iommu_detected __read_mostly = 0;
*/
int iommu_pass_through __read_mostly;
/*
* Group multi-function PCI devices into a single device-group for the
* iommu_device_group interface. This tells the iommu driver to pretend
* it cannot distinguish between functions of a device, exposing only one
* group for the device. Useful for disallowing use of individual PCI
* functions from userspace drivers.
*/
int iommu_group_mf __read_mostly;
extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
/* Dummy device used for NULL arguments (normally ISA). */
......@@ -169,6 +178,8 @@ static __init int iommu_setup(char *p)
#endif
if (!strncmp(p, "pt", 2))
iommu_pass_through = 1;
if (!strncmp(p, "group_mf", 8))
iommu_group_mf = 1;
gart_parse_options(p);
......
......@@ -596,6 +596,13 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
if (ACPI_SUCCESS(status)) {
dev_info(root->bus->bridge,
"ACPI _OSC control (0x%02x) granted\n", flags);
if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
/*
* We have ASPM control, but the FADT indicates
* that it's unsupported. Clear it.
*/
pcie_clear_aspm(root->bus);
}
} else {
dev_info(root->bus->bridge,
"ACPI _OSC request failed (%s), "
......
......@@ -34,7 +34,9 @@ config AMD_IOMMU
bool "AMD IOMMU support"
select SWIOTLB
select PCI_MSI
select PCI_IOV
select PCI_ATS
select PCI_PRI
select PCI_PASID
select IOMMU_API
depends on X86_64 && PCI && ACPI
---help---
......@@ -58,6 +60,15 @@ config AMD_IOMMU_STATS
information to userspace via debugfs.
If unsure, say N.
config AMD_IOMMU_V2
tristate "AMD IOMMU Version 2 driver (EXPERIMENTAL)"
depends on AMD_IOMMU && PROFILING && EXPERIMENTAL
select MMU_NOTIFIER
---help---
This option enables support for the AMD IOMMUv2 features of the IOMMU
hardware. Select this option if you want to use devices that support
the the PCI PRI and PASID interface.
# Intel IOMMU support
config DMAR_TABLE
bool
......
obj-$(CONFIG_IOMMU_API) += iommu.o
obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += iova.o intel-iommu.o
obj-$(CONFIG_IRQ_REMAP) += intr_remapping.o
......
......@@ -17,6 +17,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/ratelimit.h>
#include <linux/pci.h>
#include <linux/pci-ats.h>
#include <linux/bitmap.h>
......@@ -28,6 +29,8 @@
#include <linux/iommu.h>
#include <linux/delay.h>
#include <linux/amd-iommu.h>
#include <linux/notifier.h>
#include <linux/export.h>
#include <asm/msidef.h>
#include <asm/proto.h>
#include <asm/iommu.h>
......@@ -41,6 +44,24 @@
#define LOOP_TIMEOUT 100000
/*
* This bitmap is used to advertise the page sizes our hardware support
* to the IOMMU core, which will then use this information to split
* physically contiguous memory regions it is mapping into page sizes
* that we support.
*
* Traditionally the IOMMU core just handed us the mappings directly,
* after making sure the size is an order of a 4KiB page and that the
* mapping has natural alignment.
*
* To retain this behavior, we currently advertise that we support
* all page sizes that are an order of 4KiB.
*
* If at some point we'd like to utilize the IOMMU core's new behavior,
* we could change this to advertise the real page sizes we support.
*/
#define AMD_IOMMU_PGSIZES (~0xFFFUL)
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
/* A list of preallocated protection domains */
......@@ -59,6 +80,9 @@ static struct protection_domain *pt_domain;
static struct iommu_ops amd_iommu_ops;
static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
int amd_iommu_max_glx_val = -1;
/*
* general struct to manage commands send to an IOMMU
*/
......@@ -67,6 +91,7 @@ struct iommu_cmd {
};
static void update_domain(struct protection_domain *domain);
static int __init alloc_passthrough_domain(void);
/****************************************************************************
*
......@@ -147,6 +172,33 @@ static struct iommu_dev_data *get_dev_data(struct device *dev)
return dev->archdata.iommu;
}
static bool pci_iommuv2_capable(struct pci_dev *pdev)
{
static const int caps[] = {
PCI_EXT_CAP_ID_ATS,
PCI_EXT_CAP_ID_PRI,
PCI_EXT_CAP_ID_PASID,
};
int i, pos;
for (i = 0; i < 3; ++i) {
pos = pci_find_ext_capability(pdev, caps[i]);
if (pos == 0)
return false;
}
return true;
}
static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
{
struct iommu_dev_data *dev_data;
dev_data = get_dev_data(&pdev->dev);
return dev_data->errata & (1 << erratum) ? true : false;
}
/*
* In this function the list of preallocated protection domains is traversed to
* find the domain for a specific device
......@@ -204,6 +256,7 @@ static bool check_device(struct device *dev)
static int iommu_init_device(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct iommu_dev_data *dev_data;
u16 alias;
......@@ -228,6 +281,13 @@ static int iommu_init_device(struct device *dev)
dev_data->alias_data = alias_data;
}
if (pci_iommuv2_capable(pdev)) {
struct amd_iommu *iommu;
iommu = amd_iommu_rlookup_table[dev_data->devid];
dev_data->iommu_v2 = iommu->is_iommu_v2;
}
dev->archdata.iommu = dev_data;
return 0;
......@@ -317,6 +377,11 @@ DECLARE_STATS_COUNTER(domain_flush_single);
DECLARE_STATS_COUNTER(domain_flush_all);
DECLARE_STATS_COUNTER(alloced_io_mem);
DECLARE_STATS_COUNTER(total_map_requests);
DECLARE_STATS_COUNTER(complete_ppr);
DECLARE_STATS_COUNTER(invalidate_iotlb);
DECLARE_STATS_COUNTER(invalidate_iotlb_all);
DECLARE_STATS_COUNTER(pri_requests);
static struct dentry *stats_dir;
static struct dentry *de_fflush;
......@@ -351,6 +416,10 @@ static void amd_iommu_stats_init(void)
amd_iommu_stats_add(&domain_flush_all);
amd_iommu_stats_add(&alloced_io_mem);
amd_iommu_stats_add(&total_map_requests);
amd_iommu_stats_add(&complete_ppr);
amd_iommu_stats_add(&invalidate_iotlb);
amd_iommu_stats_add(&invalidate_iotlb_all);
amd_iommu_stats_add(&pri_requests);
}
#endif
......@@ -365,8 +434,8 @@ static void dump_dte_entry(u16 devid)
{
int i;
for (i = 0; i < 8; ++i)
pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
for (i = 0; i < 4; ++i)
pr_err("AMD-Vi: DTE[%d]: %016llx\n", i,
amd_iommu_dev_table[devid].data[i]);
}
......@@ -461,12 +530,84 @@ static void iommu_poll_events(struct amd_iommu *iommu)
spin_unlock_irqrestore(&iommu->lock, flags);
}
static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head)
{
struct amd_iommu_fault fault;
volatile u64 *raw;
int i;
INC_STATS_COUNTER(pri_requests);
raw = (u64 *)(iommu->ppr_log + head);
/*
* Hardware bug: Interrupt may arrive before the entry is written to
* memory. If this happens we need to wait for the entry to arrive.
*/
for (i = 0; i < LOOP_TIMEOUT; ++i) {
if (PPR_REQ_TYPE(raw[0]) != 0)
break;
udelay(1);
}
if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n");
return;
}
fault.address = raw[1];
fault.pasid = PPR_PASID(raw[0]);
fault.device_id = PPR_DEVID(raw[0]);
fault.tag = PPR_TAG(raw[0]);
fault.flags = PPR_FLAGS(raw[0]);
/*
* To detect the hardware bug we need to clear the entry
* to back to zero.
*/
raw[0] = raw[1] = 0;
atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
}
static void iommu_poll_ppr_log(struct amd_iommu *iommu)
{
unsigned long flags;
u32 head, tail;
if (iommu->ppr_log == NULL)
return;
spin_lock_irqsave(&iommu->lock, flags);
head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
while (head != tail) {
/* Handle PPR entry */
iommu_handle_ppr_entry(iommu, head);
/* Update and refresh ring-buffer state*/
head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
}
/* enable ppr interrupts again */
writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
spin_unlock_irqrestore(&iommu->lock, flags);
}
irqreturn_t amd_iommu_int_thread(int irq, void *data)
{
struct amd_iommu *iommu;
for_each_iommu(iommu)
for_each_iommu(iommu) {
iommu_poll_events(iommu);
iommu_poll_ppr_log(iommu);
}
return IRQ_HANDLED;
}
......@@ -595,6 +736,60 @@ static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
}
static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid,
u64 address, bool size)
{
memset(cmd, 0, sizeof(*cmd));
address &= ~(0xfffULL);
cmd->data[0] = pasid & PASID_MASK;
cmd->data[1] = domid;
cmd->data[2] = lower_32_bits(address);
cmd->data[3] = upper_32_bits(address);
cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
if (size)
cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
}
static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,
int qdep, u64 address, bool size)
{
memset(cmd, 0, sizeof(*cmd));
address &= ~(0xfffULL);
cmd->data[0] = devid;
cmd->data[0] |= (pasid & 0xff) << 16;
cmd->data[0] |= (qdep & 0xff) << 24;
cmd->data[1] = devid;
cmd->data[1] |= ((pasid >> 8) & 0xfff) << 16;
cmd->data[2] = lower_32_bits(address);
cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
cmd->data[3] = upper_32_bits(address);
if (size)
cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
}
static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid,
int status, int tag, bool gn)
{
memset(cmd, 0, sizeof(*cmd));
cmd->data[0] = devid;
if (gn) {
cmd->data[1] = pasid & PASID_MASK;
cmd->data[2] = CMD_INV_IOMMU_PAGES_GN_MASK;
}
cmd->data[3] = tag & 0x1ff;
cmd->data[3] |= (status & PPR_STATUS_MASK) << PPR_STATUS_SHIFT;
CMD_SET_TYPE(cmd, CMD_COMPLETE_PPR);
}
static void build_inv_all(struct iommu_cmd *cmd)
{
memset(cmd, 0, sizeof(*cmd));
......@@ -1496,6 +1691,48 @@ static void free_pagetable(struct protection_domain *domain)
domain->pt_root = NULL;
}
static void free_gcr3_tbl_level1(u64 *tbl)
{
u64 *ptr;
int i;
for (i = 0; i < 512; ++i) {
if (!(tbl[i] & GCR3_VALID))
continue;
ptr = __va(tbl[i] & PAGE_MASK);
free_page((unsigned long)ptr);
}
}
static void free_gcr3_tbl_level2(u64 *tbl)
{
u64 *ptr;
int i;
for (i = 0; i < 512; ++i) {
if (!(tbl[i] & GCR3_VALID))
continue;
ptr = __va(tbl[i] & PAGE_MASK);
free_gcr3_tbl_level1(ptr);
}
}
static void free_gcr3_table(struct protection_domain *domain)
{
if (domain->glx == 2)
free_gcr3_tbl_level2(domain->gcr3_tbl);
else if (domain->glx == 1)
free_gcr3_tbl_level1(domain->gcr3_tbl);
else if (domain->glx != 0)
BUG();
free_page((unsigned long)domain->gcr3_tbl);
}
/*
* Free a domain, only used if something went wrong in the
* allocation path and we need to free an already allocated page table
......@@ -1582,20 +1819,52 @@ static bool dma_ops_domain(struct protection_domain *domain)
static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
{
u64 pte_root = virt_to_phys(domain->pt_root);
u32 flags = 0;
u64 pte_root = 0;
u64 flags = 0;
if (domain->mode != PAGE_MODE_NONE)
pte_root = virt_to_phys(domain->pt_root);
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
flags = amd_iommu_dev_table[devid].data[1];
if (ats)
flags |= DTE_FLAG_IOTLB;
amd_iommu_dev_table[devid].data[3] |= flags;
amd_iommu_dev_table[devid].data[2] = domain->id;
amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
if (domain->flags & PD_IOMMUV2_MASK) {
u64 gcr3 = __pa(domain->gcr3_tbl);
u64 glx = domain->glx;
u64 tmp;
pte_root |= DTE_FLAG_GV;
pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT;
/* First mask out possible old values for GCR3 table */
tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
flags &= ~tmp;
tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
flags &= ~tmp;
/* Encode GCR3 table into DTE */
tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A;
pte_root |= tmp;
tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B;
flags |= tmp;
tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
flags |= tmp;
}
flags &= ~(0xffffUL);
flags |= domain->id;
amd_iommu_dev_table[devid].data[1] = flags;
amd_iommu_dev_table[devid].data[0] = pte_root;
}
static void clear_dte_entry(u16 devid)
......@@ -1603,7 +1872,6 @@ static void clear_dte_entry(u16 devid)
/* remove entry from the device table seen by the hardware */
amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
amd_iommu_dev_table[devid].data[1] = 0;
amd_iommu_dev_table[devid].data[2] = 0;
amd_iommu_apply_erratum_63(devid);
}
......@@ -1696,6 +1964,93 @@ static int __attach_device(struct iommu_dev_data *dev_data,
return ret;
}
static void pdev_iommuv2_disable(struct pci_dev *pdev)
{
pci_disable_ats(pdev);
pci_disable_pri(pdev);
pci_disable_pasid(pdev);
}
/* FIXME: Change generic reset-function to do the same */
static int pri_reset_while_enabled(struct pci_dev *pdev)
{
u16 control;
int pos;
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
if (!pos)
return -EINVAL;
pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
control |= PCI_PRI_CTRL_RESET;
pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
return 0;
}
static int pdev_iommuv2_enable(struct pci_dev *pdev)
{
bool reset_enable;
int reqs, ret;
/* FIXME: Hardcode number of outstanding requests for now */
reqs = 32;
if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE))
reqs = 1;
reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET);
/* Only allow access to user-accessible pages */
ret = pci_enable_pasid(pdev, 0);
if (ret)
goto out_err;
/* First reset the PRI state of the device */
ret = pci_reset_pri(pdev);
if (ret)
goto out_err;
/* Enable PRI */
ret = pci_enable_pri(pdev, reqs);
if (ret)
goto out_err;
if (reset_enable) {
ret = pri_reset_while_enabled(pdev);
if (ret)
goto out_err;
}
ret = pci_enable_ats(pdev, PAGE_SHIFT);
if (ret)
goto out_err;
return 0;
out_err:
pci_disable_pri(pdev);
pci_disable_pasid(pdev);
return ret;
}
/* FIXME: Move this to PCI code */
#define PCI_PRI_TLP_OFF (1 << 2)
bool pci_pri_tlp_required(struct pci_dev *pdev)
{
u16 control;
int pos;
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
if (!pos)
return false;
pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
return (control & PCI_PRI_TLP_OFF) ? true : false;
}
/*
* If a device is not yet associated with a domain, this function does
* assigns it visible for the hardware
......@@ -1710,7 +2065,18 @@ static int attach_device(struct device *dev,
dev_data = get_dev_data(dev);
if (amd_iommu_iotlb_sup && pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
if (domain->flags & PD_IOMMUV2_MASK) {
if (!dev_data->iommu_v2 || !dev_data->passthrough)
return -EINVAL;
if (pdev_iommuv2_enable(pdev) != 0)
return -EINVAL;
dev_data->ats.enabled = true;
dev_data->ats.qdep = pci_ats_queue_depth(pdev);
dev_data->pri_tlp = pci_pri_tlp_required(pdev);
} else if (amd_iommu_iotlb_sup &&
pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
dev_data->ats.enabled = true;
dev_data->ats.qdep = pci_ats_queue_depth(pdev);
}
......@@ -1760,7 +2126,7 @@ static void __detach_device(struct iommu_dev_data *dev_data)
* passthrough domain if it is detached from any other domain.
* Make sure we can deassign from the pt_domain itself.
*/
if (iommu_pass_through &&
if (dev_data->passthrough &&
(dev_data->domain == NULL && domain != pt_domain))
__attach_device(dev_data, pt_domain);
}
......@@ -1770,20 +2136,24 @@ static void __detach_device(struct iommu_dev_data *dev_data)
*/
static void detach_device(struct device *dev)
{
struct protection_domain *domain;
struct iommu_dev_data *dev_data;
unsigned long flags;
dev_data = get_dev_data(dev);
domain = dev_data->domain;
/* lock device table */
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
__detach_device(dev_data);
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
if (dev_data->ats.enabled) {
if (domain->flags & PD_IOMMUV2_MASK)
pdev_iommuv2_disable(to_pci_dev(dev));
else if (dev_data->ats.enabled)
pci_disable_ats(to_pci_dev(dev));
dev_data->ats.enabled = false;
}
dev_data->ats.enabled = false;
}
/*
......@@ -1818,18 +2188,20 @@ static struct protection_domain *domain_for_device(struct device *dev)
static int device_change_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct device *dev = data;
u16 devid;
struct protection_domain *domain;
struct dma_ops_domain *dma_domain;
struct protection_domain *domain;
struct iommu_dev_data *dev_data;
struct device *dev = data;
struct amd_iommu *iommu;
unsigned long flags;
u16 devid;
if (!check_device(dev))
return 0;
devid = get_device_id(dev);
iommu = amd_iommu_rlookup_table[devid];
devid = get_device_id(dev);
iommu = amd_iommu_rlookup_table[devid];
dev_data = get_dev_data(dev);
switch (action) {
case BUS_NOTIFY_UNBOUND_DRIVER:
......@@ -1838,7 +2210,7 @@ static int device_change_notifier(struct notifier_block *nb,
if (!domain)
goto out;
if (iommu_pass_through)
if (dev_data->passthrough)
break;
detach_device(dev);
break;
......@@ -2434,8 +2806,9 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
*/
static void prealloc_protection_domains(void)
{
struct pci_dev *dev = NULL;
struct iommu_dev_data *dev_data;
struct dma_ops_domain *dma_dom;
struct pci_dev *dev = NULL;
u16 devid;
for_each_pci_dev(dev) {
......@@ -2444,6 +2817,16 @@ static void prealloc_protection_domains(void)
if (!check_device(&dev->dev))
continue;
dev_data = get_dev_data(&dev->dev);
if (!amd_iommu_force_isolation && dev_data->iommu_v2) {
/* Make sure passthrough domain is allocated */
alloc_passthrough_domain();
dev_data->passthrough = true;
attach_device(&dev->dev, pt_domain);
pr_info("AMD-Vi: Using passthough domain for device %s\n",
dev_name(&dev->dev));
}
/* Is there already any domain for it? */
if (domain_for_device(&dev->dev))
continue;
......@@ -2474,6 +2857,7 @@ static struct dma_map_ops amd_iommu_dma_ops = {
static unsigned device_dma_ops_init(void)
{
struct iommu_dev_data *dev_data;
struct pci_dev *pdev = NULL;
unsigned unhandled = 0;
......@@ -2483,7 +2867,12 @@ static unsigned device_dma_ops_init(void)
continue;
}
pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops;
dev_data = get_dev_data(&pdev->dev);
if (!dev_data->passthrough)
pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops;
else
pdev->dev.archdata.dma_ops = &nommu_dma_ops;
}
return unhandled;
......@@ -2610,6 +2999,20 @@ static struct protection_domain *protection_domain_alloc(void)
return NULL;
}
static int __init alloc_passthrough_domain(void)
{
if (pt_domain != NULL)
return 0;
/* allocate passthrough domain */
pt_domain = protection_domain_alloc();
if (!pt_domain)
return -ENOMEM;
pt_domain->mode = PAGE_MODE_NONE;
return 0;
}
static int amd_iommu_domain_init(struct iommu_domain *dom)
{
struct protection_domain *domain;
......@@ -2623,6 +3026,8 @@ static int amd_iommu_domain_init(struct iommu_domain *dom)
if (!domain->pt_root)
goto out_free;
domain->iommu_domain = dom;
dom->priv = domain;
return 0;
......@@ -2645,7 +3050,11 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)
BUG_ON(domain->dev_cnt != 0);
free_pagetable(domain);
if (domain->mode != PAGE_MODE_NONE)
free_pagetable(domain);
if (domain->flags & PD_IOMMUV2_MASK)
free_gcr3_table(domain);
protection_domain_free(domain);
......@@ -2702,13 +3111,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
}
static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
phys_addr_t paddr, int gfp_order, int iommu_prot)
phys_addr_t paddr, size_t page_size, int iommu_prot)
{
unsigned long page_size = 0x1000UL << gfp_order;
struct protection_domain *domain = dom->priv;
int prot = 0;
int ret;
if (domain->mode == PAGE_MODE_NONE)
return -EINVAL;
if (iommu_prot & IOMMU_READ)
prot |= IOMMU_PROT_IR;
if (iommu_prot & IOMMU_WRITE)
......@@ -2721,13 +3132,14 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
return ret;
}
static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
int gfp_order)
static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
size_t page_size)
{
struct protection_domain *domain = dom->priv;
unsigned long page_size, unmap_size;
size_t unmap_size;
page_size = 0x1000UL << gfp_order;
if (domain->mode == PAGE_MODE_NONE)
return -EINVAL;
mutex_lock(&domain->api_lock);
unmap_size = iommu_unmap_page(domain, iova, page_size);
......@@ -2735,7 +3147,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
domain_flush_tlb_pde(domain);
return get_order(unmap_size);
return unmap_size;
}
static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
......@@ -2746,6 +3158,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
phys_addr_t paddr;
u64 *pte, __pte;
if (domain->mode == PAGE_MODE_NONE)
return iova;
pte = fetch_pte(domain, iova);
if (!pte || !IOMMU_PTE_PRESENT(*pte))
......@@ -2773,6 +3188,26 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
return 0;
}
static int amd_iommu_device_group(struct device *dev, unsigned int *groupid)
{
struct iommu_dev_data *dev_data = dev->archdata.iommu;
struct pci_dev *pdev = to_pci_dev(dev);
u16 devid;
if (!dev_data)
return -ENODEV;
if (pdev->is_virtfn || !iommu_group_mf)
devid = dev_data->devid;
else
devid = calc_devid(pdev->bus->number,
PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
*groupid = amd_iommu_alias_table[devid];
return 0;
}
static struct iommu_ops amd_iommu_ops = {
.domain_init = amd_iommu_domain_init,
.domain_destroy = amd_iommu_domain_destroy,
......@@ -2782,6 +3217,8 @@ static struct iommu_ops amd_iommu_ops = {
.unmap = amd_iommu_unmap,
.iova_to_phys = amd_iommu_iova_to_phys,
.domain_has_cap = amd_iommu_domain_has_cap,
.device_group = amd_iommu_device_group,
.pgsize_bitmap = AMD_IOMMU_PGSIZES,
};
/*****************************************************************************
......@@ -2796,21 +3233,23 @@ static struct iommu_ops amd_iommu_ops = {
int __init amd_iommu_init_passthrough(void)
{
struct amd_iommu *iommu;
struct iommu_dev_data *dev_data;
struct pci_dev *dev = NULL;
struct amd_iommu *iommu;
u16 devid;
int ret;
/* allocate passthrough domain */
pt_domain = protection_domain_alloc();
if (!pt_domain)
return -ENOMEM;
pt_domain->mode |= PAGE_MODE_NONE;
ret = alloc_passthrough_domain();
if (ret)
return ret;
for_each_pci_dev(dev) {
if (!check_device(&dev->dev))
continue;
dev_data = get_dev_data(&dev->dev);
dev_data->passthrough = true;
devid = get_device_id(&dev->dev);
iommu = amd_iommu_rlookup_table[devid];
......@@ -2820,7 +3259,375 @@ int __init amd_iommu_init_passthrough(void)
attach_device(&dev->dev, pt_domain);
}
amd_iommu_stats_init();
pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
return 0;
}
/* IOMMUv2 specific functions */
int amd_iommu_register_ppr_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_register(&ppr_notifier, nb);
}
EXPORT_SYMBOL(amd_iommu_register_ppr_notifier);
int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_unregister(&ppr_notifier, nb);
}
EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
void amd_iommu_domain_direct_map(struct iommu_domain *dom)
{
struct protection_domain *domain = dom->priv;
unsigned long flags;
spin_lock_irqsave(&domain->lock, flags);
/* Update data structure */
domain->mode = PAGE_MODE_NONE;
domain->updated = true;
/* Make changes visible to IOMMUs */
update_domain(domain);
/* Page-table is not visible to IOMMU anymore, so free it */
free_pagetable(domain);
spin_unlock_irqrestore(&domain->lock, flags);
}
EXPORT_SYMBOL(amd_iommu_domain_direct_map);
int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
{
struct protection_domain *domain = dom->priv;
unsigned long flags;
int levels, ret;
if (pasids <= 0 || pasids > (PASID_MASK + 1))
return -EINVAL;
/* Number of GCR3 table levels required */
for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9)
levels += 1;
if (levels > amd_iommu_max_glx_val)
return -EINVAL;
spin_lock_irqsave(&domain->lock, flags);
/*
* Save us all sanity checks whether devices already in the
* domain support IOMMUv2. Just force that the domain has no
* devices attached when it is switched into IOMMUv2 mode.
*/
ret = -EBUSY;
if (domain->dev_cnt > 0 || domain->flags & PD_IOMMUV2_MASK)
goto out;
ret = -ENOMEM;
domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC);
if (domain->gcr3_tbl == NULL)
goto out;
domain->glx = levels;
domain->flags |= PD_IOMMUV2_MASK;
domain->updated = true;
update_domain(domain);
ret = 0;
out:
spin_unlock_irqrestore(&domain->lock, flags);
return ret;
}
EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
static int __flush_pasid(struct protection_domain *domain, int pasid,
u64 address, bool size)
{
struct iommu_dev_data *dev_data;
struct iommu_cmd cmd;
int i, ret;
if (!(domain->flags & PD_IOMMUV2_MASK))
return -EINVAL;
build_inv_iommu_pasid(&cmd, domain->id, pasid, address, size);
/*
* IOMMU TLB needs to be flushed before Device TLB to
* prevent device TLB refill from IOMMU TLB
*/
for (i = 0; i < amd_iommus_present; ++i) {
if (domain->dev_iommu[i] == 0)
continue;
ret = iommu_queue_command(amd_iommus[i], &cmd);
if (ret != 0)
goto out;
}
/* Wait until IOMMU TLB flushes are complete */
domain_flush_complete(domain);
/* Now flush device TLBs */
list_for_each_entry(dev_data, &domain->dev_list, list) {
struct amd_iommu *iommu;
int qdep;
BUG_ON(!dev_data->ats.enabled);
qdep = dev_data->ats.qdep;
iommu = amd_iommu_rlookup_table[dev_data->devid];
build_inv_iotlb_pasid(&cmd, dev_data->devid, pasid,
qdep, address, size);
ret = iommu_queue_command(iommu, &cmd);
if (ret != 0)
goto out;
}
/* Wait until all device TLBs are flushed */
domain_flush_complete(domain);
ret = 0;
out:
return ret;
}
static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid,
u64 address)
{
INC_STATS_COUNTER(invalidate_iotlb);
return __flush_pasid(domain, pasid, address, false);
}
int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
u64 address)
{
struct protection_domain *domain = dom->priv;
unsigned long flags;
int ret;
spin_lock_irqsave(&domain->lock, flags);
ret = __amd_iommu_flush_page(domain, pasid, address);
spin_unlock_irqrestore(&domain->lock, flags);
return ret;
}
EXPORT_SYMBOL(amd_iommu_flush_page);
static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid)
{
INC_STATS_COUNTER(invalidate_iotlb_all);
return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
true);
}
int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid)
{
struct protection_domain *domain = dom->priv;
unsigned long flags;
int ret;
spin_lock_irqsave(&domain->lock, flags);
ret = __amd_iommu_flush_tlb(domain, pasid);
spin_unlock_irqrestore(&domain->lock, flags);
return ret;
}
EXPORT_SYMBOL(amd_iommu_flush_tlb);
static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc)
{
int index;
u64 *pte;
while (true) {
index = (pasid >> (9 * level)) & 0x1ff;
pte = &root[index];
if (level == 0)
break;
if (!(*pte & GCR3_VALID)) {
if (!alloc)
return NULL;
root = (void *)get_zeroed_page(GFP_ATOMIC);
if (root == NULL)
return NULL;
*pte = __pa(root) | GCR3_VALID;
}
root = __va(*pte & PAGE_MASK);
level -= 1;
}
return pte;
}
static int __set_gcr3(struct protection_domain *domain, int pasid,
unsigned long cr3)
{
u64 *pte;
if (domain->mode != PAGE_MODE_NONE)
return -EINVAL;
pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
if (pte == NULL)
return -ENOMEM;
*pte = (cr3 & PAGE_MASK) | GCR3_VALID;
return __amd_iommu_flush_tlb(domain, pasid);
}
static int __clear_gcr3(struct protection_domain *domain, int pasid)
{
u64 *pte;
if (domain->mode != PAGE_MODE_NONE)
return -EINVAL;
pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
if (pte == NULL)
return 0;
*pte = 0;
return __amd_iommu_flush_tlb(domain, pasid);
}
int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
unsigned long cr3)
{
struct protection_domain *domain = dom->priv;
unsigned long flags;
int ret;
spin_lock_irqsave(&domain->lock, flags);
ret = __set_gcr3(domain, pasid, cr3);
spin_unlock_irqrestore(&domain->lock, flags);
return ret;
}
EXPORT_SYMBOL(amd_iommu_domain_set_gcr3);
int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid)
{
struct protection_domain *domain = dom->priv;
unsigned long flags;
int ret;
spin_lock_irqsave(&domain->lock, flags);
ret = __clear_gcr3(domain, pasid);
spin_unlock_irqrestore(&domain->lock, flags);
return ret;
}
EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3);
int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
int status, int tag)
{
struct iommu_dev_data *dev_data;
struct amd_iommu *iommu;
struct iommu_cmd cmd;
INC_STATS_COUNTER(complete_ppr);
dev_data = get_dev_data(&pdev->dev);
iommu = amd_iommu_rlookup_table[dev_data->devid];
build_complete_ppr(&cmd, dev_data->devid, pasid, status,
tag, dev_data->pri_tlp);
return iommu_queue_command(iommu, &cmd);
}
EXPORT_SYMBOL(amd_iommu_complete_ppr);
struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev)
{
struct protection_domain *domain;
domain = get_domain(&pdev->dev);
if (IS_ERR(domain))
return NULL;
/* Only return IOMMUv2 domains */
if (!(domain->flags & PD_IOMMUV2_MASK))
return NULL;
return domain->iommu_domain;
}
EXPORT_SYMBOL(amd_iommu_get_v2_domain);
void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum)
{
struct iommu_dev_data *dev_data;
if (!amd_iommu_v2_supported())
return;
dev_data = get_dev_data(&pdev->dev);
dev_data->errata |= (1 << erratum);
}
EXPORT_SYMBOL(amd_iommu_enable_device_erratum);
int amd_iommu_device_info(struct pci_dev *pdev,
struct amd_iommu_device_info *info)
{
int max_pasids;
int pos;
if (pdev == NULL || info == NULL)
return -EINVAL;
if (!amd_iommu_v2_supported())
return -EINVAL;
memset(info, 0, sizeof(*info));
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS);
if (pos)
info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
if (pos)
info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
if (pos) {
int features;
max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1));
max_pasids = min(max_pasids, (1 << 20));
info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
info->max_pasids = min(pci_max_pasids(pdev), max_pasids);
features = pci_pasid_features(pdev);
if (features & PCI_PASID_CAP_EXEC)
info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
if (features & PCI_PASID_CAP_PRIV)
info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
}
return 0;
}
EXPORT_SYMBOL(amd_iommu_device_info);
......@@ -25,6 +25,7 @@
#include <linux/interrupt.h>
#include <linux/msi.h>
#include <linux/amd-iommu.h>
#include <linux/export.h>
#include <asm/pci-direct.h>
#include <asm/iommu.h>
#include <asm/gart.h>
......@@ -141,6 +142,12 @@ int amd_iommus_present;
bool amd_iommu_np_cache __read_mostly;
bool amd_iommu_iotlb_sup __read_mostly = true;
u32 amd_iommu_max_pasids __read_mostly = ~0;
bool amd_iommu_v2_present __read_mostly;
bool amd_iommu_force_isolation __read_mostly;
/*
* The ACPI table parsing functions set this variable on an error
*/
......@@ -299,6 +306,16 @@ static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
}
static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
{
u32 ctrl;
ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
ctrl &= ~CTRL_INV_TO_MASK;
ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
}
/* Function to enable the hardware */
static void iommu_enable(struct amd_iommu *iommu)
{
......@@ -581,21 +598,69 @@ static void __init free_event_buffer(struct amd_iommu *iommu)
free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
}
/* allocates the memory where the IOMMU will log its events to */
static u8 * __init alloc_ppr_log(struct amd_iommu *iommu)
{
iommu->ppr_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(PPR_LOG_SIZE));
if (iommu->ppr_log == NULL)
return NULL;
return iommu->ppr_log;
}
static void iommu_enable_ppr_log(struct amd_iommu *iommu)
{
u64 entry;
if (iommu->ppr_log == NULL)
return;
entry = (u64)virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
&entry, sizeof(entry));
/* set head and tail to zero manually */
writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
iommu_feature_enable(iommu, CONTROL_PPFLOG_EN);
iommu_feature_enable(iommu, CONTROL_PPR_EN);
}
static void __init free_ppr_log(struct amd_iommu *iommu)
{
if (iommu->ppr_log == NULL)
return;
free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
}
static void iommu_enable_gt(struct amd_iommu *iommu)
{
if (!iommu_feature(iommu, FEATURE_GT))
return;
iommu_feature_enable(iommu, CONTROL_GT_EN);
}
/* sets a specific bit in the device table entry. */
static void set_dev_entry_bit(u16 devid, u8 bit)
{
int i = (bit >> 5) & 0x07;
int _bit = bit & 0x1f;
int i = (bit >> 6) & 0x03;
int _bit = bit & 0x3f;
amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
amd_iommu_dev_table[devid].data[i] |= (1UL << _bit);
}
static int get_dev_entry_bit(u16 devid, u8 bit)
{
int i = (bit >> 5) & 0x07;
int _bit = bit & 0x1f;
int i = (bit >> 6) & 0x03;
int _bit = bit & 0x3f;
return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit;
return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
}
......@@ -699,6 +764,32 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
iommu->features = ((u64)high << 32) | low;
if (iommu_feature(iommu, FEATURE_GT)) {
int glxval;
u32 pasids;
u64 shift;
shift = iommu->features & FEATURE_PASID_MASK;
shift >>= FEATURE_PASID_SHIFT;
pasids = (1 << shift);
amd_iommu_max_pasids = min(amd_iommu_max_pasids, pasids);
glxval = iommu->features & FEATURE_GLXVAL_MASK;
glxval >>= FEATURE_GLXVAL_SHIFT;
if (amd_iommu_max_glx_val == -1)
amd_iommu_max_glx_val = glxval;
else
amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
}
if (iommu_feature(iommu, FEATURE_GT) &&
iommu_feature(iommu, FEATURE_PPR)) {
iommu->is_iommu_v2 = true;
amd_iommu_v2_present = true;
}
if (!is_rd890_iommu(iommu->dev))
return;
......@@ -901,6 +992,7 @@ static void __init free_iommu_one(struct amd_iommu *iommu)
{
free_command_buffer(iommu);
free_event_buffer(iommu);
free_ppr_log(iommu);
iommu_unmap_mmio_space(iommu);
}
......@@ -964,6 +1056,12 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
init_iommu_from_acpi(iommu, h);
init_iommu_devices(iommu);
if (iommu_feature(iommu, FEATURE_PPR)) {
iommu->ppr_log = alloc_ppr_log(iommu);
if (!iommu->ppr_log)
return -ENOMEM;
}
if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
amd_iommu_np_cache = true;
......@@ -1050,6 +1148,9 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
iommu->int_enabled = true;
iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
if (iommu->ppr_log != NULL)
iommu_feature_enable(iommu, CONTROL_PPFINT_EN);
return 0;
}
......@@ -1209,6 +1310,9 @@ static void iommu_init_flags(struct amd_iommu *iommu)
* make IOMMU memory accesses cache coherent
*/
iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
/* Set IOTLB invalidation timeout to 1s */
iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
}
static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
......@@ -1274,6 +1378,8 @@ static void enable_iommus(void)
iommu_set_device_table(iommu);
iommu_enable_command_buffer(iommu);
iommu_enable_event_buffer(iommu);
iommu_enable_ppr_log(iommu);
iommu_enable_gt(iommu);
iommu_set_exclusion_range(iommu);
iommu_init_msi(iommu);
iommu_enable(iommu);
......@@ -1303,13 +1409,6 @@ static void amd_iommu_resume(void)
/* re-load the hardware */
enable_iommus();
/*
* we have to flush after the IOMMUs are enabled because a
* disabled IOMMU will never execute the commands we send
*/
for_each_iommu(iommu)
iommu_flush_all_caches(iommu);
}
static int amd_iommu_suspend(void)
......@@ -1560,6 +1659,8 @@ static int __init parse_amd_iommu_options(char *str)
amd_iommu_unmap_flush = true;
if (strncmp(str, "off", 3) == 0)
amd_iommu_disabled = true;
if (strncmp(str, "force_isolation", 15) == 0)
amd_iommu_force_isolation = true;
}
return 1;
......@@ -1572,3 +1673,9 @@ IOMMU_INIT_FINISH(amd_iommu_detect,
gart_iommu_hole_init,
0,
0);
bool amd_iommu_v2_supported(void)
{
return amd_iommu_v2_present;
}
EXPORT_SYMBOL(amd_iommu_v2_supported);
......@@ -31,6 +31,30 @@ extern int amd_iommu_init_devices(void);
extern void amd_iommu_uninit_devices(void);
extern void amd_iommu_init_notifier(void);
extern void amd_iommu_init_api(void);
/* IOMMUv2 specific functions */
struct iommu_domain;
extern bool amd_iommu_v2_supported(void);
extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
u64 address);
extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
unsigned long cr3);
extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
#define PPR_SUCCESS 0x0
#define PPR_INVALID 0x1
#define PPR_FAILURE 0xf
extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
int status, int tag);
#ifndef CONFIG_AMD_IOMMU_STATS
static inline void amd_iommu_stats_init(void) { }
......
......@@ -69,11 +69,14 @@
#define MMIO_EXCL_BASE_OFFSET 0x0020
#define MMIO_EXCL_LIMIT_OFFSET 0x0028
#define MMIO_EXT_FEATURES 0x0030
#define MMIO_PPR_LOG_OFFSET 0x0038
#define MMIO_CMD_HEAD_OFFSET 0x2000
#define MMIO_CMD_TAIL_OFFSET 0x2008
#define MMIO_EVT_HEAD_OFFSET 0x2010
#define MMIO_EVT_TAIL_OFFSET 0x2018
#define MMIO_STATUS_OFFSET 0x2020
#define MMIO_PPR_HEAD_OFFSET 0x2030
#define MMIO_PPR_TAIL_OFFSET 0x2038
/* Extended Feature Bits */
......@@ -87,8 +90,17 @@
#define FEATURE_HE (1ULL<<8)
#define FEATURE_PC (1ULL<<9)
#define FEATURE_PASID_SHIFT 32
#define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT)
#define FEATURE_GLXVAL_SHIFT 14
#define FEATURE_GLXVAL_MASK (0x03ULL << FEATURE_GLXVAL_SHIFT)
#define PASID_MASK 0x000fffff
/* MMIO status bits */
#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04
#define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2)
#define MMIO_STATUS_PPR_INT_MASK (1 << 6)
/* event logging constants */
#define EVENT_ENTRY_SIZE 0x10
......@@ -115,6 +127,7 @@
#define CONTROL_EVT_LOG_EN 0x02ULL
#define CONTROL_EVT_INT_EN 0x03ULL
#define CONTROL_COMWAIT_EN 0x04ULL
#define CONTROL_INV_TIMEOUT 0x05ULL
#define CONTROL_PASSPW_EN 0x08ULL
#define CONTROL_RESPASSPW_EN 0x09ULL
#define CONTROL_COHERENT_EN 0x0aULL
......@@ -122,18 +135,34 @@
#define CONTROL_CMDBUF_EN 0x0cULL
#define CONTROL_PPFLOG_EN 0x0dULL
#define CONTROL_PPFINT_EN 0x0eULL
#define CONTROL_PPR_EN 0x0fULL
#define CONTROL_GT_EN 0x10ULL
#define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT)
#define CTRL_INV_TO_NONE 0
#define CTRL_INV_TO_1MS 1
#define CTRL_INV_TO_10MS 2
#define CTRL_INV_TO_100MS 3
#define CTRL_INV_TO_1S 4
#define CTRL_INV_TO_10S 5
#define CTRL_INV_TO_100S 6
/* command specific defines */
#define CMD_COMPL_WAIT 0x01
#define CMD_INV_DEV_ENTRY 0x02
#define CMD_INV_IOMMU_PAGES 0x03
#define CMD_INV_IOTLB_PAGES 0x04
#define CMD_COMPLETE_PPR 0x07
#define CMD_INV_ALL 0x08
#define CMD_COMPL_WAIT_STORE_MASK 0x01
#define CMD_COMPL_WAIT_INT_MASK 0x02
#define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01
#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02
#define CMD_INV_IOMMU_PAGES_GN_MASK 0x04
#define PPR_STATUS_MASK 0xf
#define PPR_STATUS_SHIFT 12
#define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL
......@@ -165,6 +194,23 @@
#define EVT_BUFFER_SIZE 8192 /* 512 entries */
#define EVT_LEN_MASK (0x9ULL << 56)
/* Constants for PPR Log handling */
#define PPR_LOG_ENTRIES 512
#define PPR_LOG_SIZE_SHIFT 56
#define PPR_LOG_SIZE_512 (0x9ULL << PPR_LOG_SIZE_SHIFT)
#define PPR_ENTRY_SIZE 16
#define PPR_LOG_SIZE (PPR_ENTRY_SIZE * PPR_LOG_ENTRIES)
#define PPR_REQ_TYPE(x) (((x) >> 60) & 0xfULL)
#define PPR_FLAGS(x) (((x) >> 48) & 0xfffULL)
#define PPR_DEVID(x) ((x) & 0xffffULL)
#define PPR_TAG(x) (((x) >> 32) & 0x3ffULL)
#define PPR_PASID1(x) (((x) >> 16) & 0xffffULL)
#define PPR_PASID2(x) (((x) >> 42) & 0xfULL)
#define PPR_PASID(x) ((PPR_PASID2(x) << 16) | PPR_PASID1(x))
#define PPR_REQ_FAULT 0x01
#define PAGE_MODE_NONE 0x00
#define PAGE_MODE_1_LEVEL 0x01
#define PAGE_MODE_2_LEVEL 0x02
......@@ -230,7 +276,24 @@
#define IOMMU_PTE_IR (1ULL << 61)
#define IOMMU_PTE_IW (1ULL << 62)
#define DTE_FLAG_IOTLB 0x01
#define DTE_FLAG_IOTLB (0x01UL << 32)
#define DTE_FLAG_GV (0x01ULL << 55)
#define DTE_GLX_SHIFT (56)
#define DTE_GLX_MASK (3)
#define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL)
#define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL)
#define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0xfffffULL)
#define DTE_GCR3_INDEX_A 0
#define DTE_GCR3_INDEX_B 1
#define DTE_GCR3_INDEX_C 1
#define DTE_GCR3_SHIFT_A 58
#define DTE_GCR3_SHIFT_B 16
#define DTE_GCR3_SHIFT_C 43
#define GCR3_VALID 0x01ULL
#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
......@@ -257,6 +320,7 @@
domain for an IOMMU */
#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page
translation */
#define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */
extern bool amd_iommu_dump;
#define DUMP_printk(format, arg...) \
......@@ -285,6 +349,29 @@ extern bool amd_iommu_iotlb_sup;
#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT)
#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
/*
* This struct is used to pass information about
* incoming PPR faults around.
*/
struct amd_iommu_fault {
u64 address; /* IO virtual address of the fault*/
u32 pasid; /* Address space identifier */
u16 device_id; /* Originating PCI device id */
u16 tag; /* PPR tag */
u16 flags; /* Fault flags */
};
#define PPR_FAULT_EXEC (1 << 1)
#define PPR_FAULT_READ (1 << 2)
#define PPR_FAULT_WRITE (1 << 5)
#define PPR_FAULT_USER (1 << 6)
#define PPR_FAULT_RSVD (1 << 7)
#define PPR_FAULT_GN (1 << 8)
struct iommu_domain;
/*
* This structure contains generic data for IOMMU protection domains
* independent of their use.
......@@ -297,11 +384,15 @@ struct protection_domain {
u16 id; /* the domain id written to the device table */
int mode; /* paging mode (0-6 levels) */
u64 *pt_root; /* page table root pointer */
int glx; /* Number of levels for GCR3 table */
u64 *gcr3_tbl; /* Guest CR3 table */
unsigned long flags; /* flags to find out type of domain */
bool updated; /* complete domain flush required */
unsigned dev_cnt; /* devices assigned to this domain */
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
void *priv; /* private data */
struct iommu_domain *iommu_domain; /* Pointer to generic
domain structure */
};
......@@ -315,10 +406,15 @@ struct iommu_dev_data {
struct protection_domain *domain; /* Domain the device is bound to */
atomic_t bind; /* Domain attach reverent count */
u16 devid; /* PCI Device ID */
bool iommu_v2; /* Device can make use of IOMMUv2 */
bool passthrough; /* Default for device is pt_domain */
struct {
bool enabled;
int qdep;
} ats; /* ATS state */
bool pri_tlp; /* PASID TLB required for
PPR completions */
u32 errata; /* Bitmap for errata to apply */
};
/*
......@@ -399,6 +495,9 @@ struct amd_iommu {
/* Extended features */
u64 features;
/* IOMMUv2 */
bool is_iommu_v2;
/*
* Capability pointer. There could be more than one IOMMU per PCI
* device function if there are more than one AMD IOMMU capability
......@@ -431,6 +530,9 @@ struct amd_iommu {
/* MSI number for event interrupt */
u16 evt_msi_num;
/* Base of the PPR log, if present */
u8 *ppr_log;
/* true if interrupts for this IOMMU are already enabled */
bool int_enabled;
......@@ -484,7 +586,7 @@ extern struct list_head amd_iommu_pd_list;
* Structure defining one entry in the device table
*/
struct dev_table_entry {
u32 data[8];
u64 data[4];
};
/*
......@@ -549,6 +651,16 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap;
*/
extern bool amd_iommu_unmap_flush;
/* Smallest number of PASIDs supported by any IOMMU in the system */
extern u32 amd_iommu_max_pasids;
extern bool amd_iommu_v2_present;
extern bool amd_iommu_force_isolation;
/* Max levels of glxval supported */
extern int amd_iommu_max_glx_val;
/* takes bus and device/function and returns the device id
* FIXME: should that be in generic PCI code? */
static inline u16 calc_devid(u8 bus, u8 devfn)
......
/*
* Copyright (C) 2010-2012 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <joerg.roedel@amd.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/mmu_notifier.h>
#include <linux/amd-iommu.h>
#include <linux/mm_types.h>
#include <linux/profile.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/iommu.h>
#include <linux/wait.h>
#include <linux/pci.h>
#include <linux/gfp.h>
#include "amd_iommu_types.h"
#include "amd_iommu_proto.h"
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>");
#define MAX_DEVICES 0x10000
#define PRI_QUEUE_SIZE 512
struct pri_queue {
atomic_t inflight;
bool finish;
int status;
};
struct pasid_state {
struct list_head list; /* For global state-list */
atomic_t count; /* Reference count */
struct task_struct *task; /* Task bound to this PASID */
struct mm_struct *mm; /* mm_struct for the faults */
struct mmu_notifier mn; /* mmu_otifier handle */
struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */
struct device_state *device_state; /* Link to our device_state */
int pasid; /* PASID index */
spinlock_t lock; /* Protect pri_queues */
wait_queue_head_t wq; /* To wait for count == 0 */
};
struct device_state {
atomic_t count;
struct pci_dev *pdev;
struct pasid_state **states;
struct iommu_domain *domain;
int pasid_levels;
int max_pasids;
amd_iommu_invalid_ppr_cb inv_ppr_cb;
amd_iommu_invalidate_ctx inv_ctx_cb;
spinlock_t lock;
wait_queue_head_t wq;
};
struct fault {
struct work_struct work;
struct device_state *dev_state;
struct pasid_state *state;
struct mm_struct *mm;
u64 address;
u16 devid;
u16 pasid;
u16 tag;
u16 finish;
u16 flags;
};
struct device_state **state_table;
static spinlock_t state_lock;
/* List and lock for all pasid_states */
static LIST_HEAD(pasid_state_list);
static DEFINE_SPINLOCK(ps_lock);
static struct workqueue_struct *iommu_wq;
/*
* Empty page table - Used between
* mmu_notifier_invalidate_range_start and
* mmu_notifier_invalidate_range_end
*/
static u64 *empty_page_table;
static void free_pasid_states(struct device_state *dev_state);
static void unbind_pasid(struct device_state *dev_state, int pasid);
static int task_exit(struct notifier_block *nb, unsigned long e, void *data);
static u16 device_id(struct pci_dev *pdev)
{
u16 devid;
devid = pdev->bus->number;
devid = (devid << 8) | pdev->devfn;
return devid;
}
static struct device_state *get_device_state(u16 devid)
{
struct device_state *dev_state;
unsigned long flags;
spin_lock_irqsave(&state_lock, flags);
dev_state = state_table[devid];
if (dev_state != NULL)
atomic_inc(&dev_state->count);
spin_unlock_irqrestore(&state_lock, flags);
return dev_state;
}
static void free_device_state(struct device_state *dev_state)
{
/*
* First detach device from domain - No more PRI requests will arrive
* from that device after it is unbound from the IOMMUv2 domain.
*/
iommu_detach_device(dev_state->domain, &dev_state->pdev->dev);
/* Everything is down now, free the IOMMUv2 domain */
iommu_domain_free(dev_state->domain);
/* Finally get rid of the device-state */
kfree(dev_state);
}
static void put_device_state(struct device_state *dev_state)
{
if (atomic_dec_and_test(&dev_state->count))
wake_up(&dev_state->wq);
}
static void put_device_state_wait(struct device_state *dev_state)
{
DEFINE_WAIT(wait);
prepare_to_wait(&dev_state->wq, &wait, TASK_UNINTERRUPTIBLE);
if (!atomic_dec_and_test(&dev_state->count))
schedule();
finish_wait(&dev_state->wq, &wait);
free_device_state(dev_state);
}
static struct notifier_block profile_nb = {
.notifier_call = task_exit,
};
static void link_pasid_state(struct pasid_state *pasid_state)
{
spin_lock(&ps_lock);
list_add_tail(&pasid_state->list, &pasid_state_list);
spin_unlock(&ps_lock);
}
static void __unlink_pasid_state(struct pasid_state *pasid_state)
{
list_del(&pasid_state->list);
}
static void unlink_pasid_state(struct pasid_state *pasid_state)
{
spin_lock(&ps_lock);
__unlink_pasid_state(pasid_state);
spin_unlock(&ps_lock);
}
/* Must be called under dev_state->lock */
static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
int pasid, bool alloc)
{
struct pasid_state **root, **ptr;
int level, index;
level = dev_state->pasid_levels;
root = dev_state->states;
while (true) {
index = (pasid >> (9 * level)) & 0x1ff;
ptr = &root[index];
if (level == 0)
break;
if (*ptr == NULL) {
if (!alloc)
return NULL;
*ptr = (void *)get_zeroed_page(GFP_ATOMIC);
if (*ptr == NULL)
return NULL;
}
root = (struct pasid_state **)*ptr;
level -= 1;
}
return ptr;
}
static int set_pasid_state(struct device_state *dev_state,
struct pasid_state *pasid_state,
int pasid)
{
struct pasid_state **ptr;
unsigned long flags;
int ret;
spin_lock_irqsave(&dev_state->lock, flags);
ptr = __get_pasid_state_ptr(dev_state, pasid, true);
ret = -ENOMEM;
if (ptr == NULL)
goto out_unlock;
ret = -ENOMEM;
if (*ptr != NULL)
goto out_unlock;
*ptr = pasid_state;
ret = 0;
out_unlock:
spin_unlock_irqrestore(&dev_state->lock, flags);
return ret;
}
static void clear_pasid_state(struct device_state *dev_state, int pasid)
{
struct pasid_state **ptr;
unsigned long flags;
spin_lock_irqsave(&dev_state->lock, flags);
ptr = __get_pasid_state_ptr(dev_state, pasid, true);
if (ptr == NULL)
goto out_unlock;
*ptr = NULL;
out_unlock:
spin_unlock_irqrestore(&dev_state->lock, flags);
}
static struct pasid_state *get_pasid_state(struct device_state *dev_state,
int pasid)
{
struct pasid_state **ptr, *ret = NULL;
unsigned long flags;
spin_lock_irqsave(&dev_state->lock, flags);
ptr = __get_pasid_state_ptr(dev_state, pasid, false);
if (ptr == NULL)
goto out_unlock;
ret = *ptr;
if (ret)
atomic_inc(&ret->count);
out_unlock:
spin_unlock_irqrestore(&dev_state->lock, flags);
return ret;
}
static void free_pasid_state(struct pasid_state *pasid_state)
{
kfree(pasid_state);
}
static void put_pasid_state(struct pasid_state *pasid_state)
{
if (atomic_dec_and_test(&pasid_state->count)) {
put_device_state(pasid_state->device_state);
wake_up(&pasid_state->wq);
}
}
static void put_pasid_state_wait(struct pasid_state *pasid_state)
{
DEFINE_WAIT(wait);
prepare_to_wait(&pasid_state->wq, &wait, TASK_UNINTERRUPTIBLE);
if (atomic_dec_and_test(&pasid_state->count))
put_device_state(pasid_state->device_state);
else
schedule();
finish_wait(&pasid_state->wq, &wait);
mmput(pasid_state->mm);
free_pasid_state(pasid_state);
}
static void __unbind_pasid(struct pasid_state *pasid_state)
{
struct iommu_domain *domain;
domain = pasid_state->device_state->domain;
amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid);
clear_pasid_state(pasid_state->device_state, pasid_state->pasid);
/* Make sure no more pending faults are in the queue */
flush_workqueue(iommu_wq);
mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
put_pasid_state(pasid_state); /* Reference taken in bind() function */
}
static void unbind_pasid(struct device_state *dev_state, int pasid)
{
struct pasid_state *pasid_state;
pasid_state = get_pasid_state(dev_state, pasid);
if (pasid_state == NULL)
return;
unlink_pasid_state(pasid_state);
__unbind_pasid(pasid_state);
put_pasid_state_wait(pasid_state); /* Reference taken in this function */
}
static void free_pasid_states_level1(struct pasid_state **tbl)
{
int i;
for (i = 0; i < 512; ++i) {
if (tbl[i] == NULL)
continue;
free_page((unsigned long)tbl[i]);
}
}
static void free_pasid_states_level2(struct pasid_state **tbl)
{
struct pasid_state **ptr;
int i;
for (i = 0; i < 512; ++i) {
if (tbl[i] == NULL)
continue;
ptr = (struct pasid_state **)tbl[i];
free_pasid_states_level1(ptr);
}
}
static void free_pasid_states(struct device_state *dev_state)
{
struct pasid_state *pasid_state;
int i;
for (i = 0; i < dev_state->max_pasids; ++i) {
pasid_state = get_pasid_state(dev_state, i);
if (pasid_state == NULL)
continue;
put_pasid_state(pasid_state);
unbind_pasid(dev_state, i);
}
if (dev_state->pasid_levels == 2)
free_pasid_states_level2(dev_state->states);
else if (dev_state->pasid_levels == 1)
free_pasid_states_level1(dev_state->states);
else if (dev_state->pasid_levels != 0)
BUG();
free_page((unsigned long)dev_state->states);
}
static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
{
return container_of(mn, struct pasid_state, mn);
}
static void __mn_flush_page(struct mmu_notifier *mn,
unsigned long address)
{
struct pasid_state *pasid_state;
struct device_state *dev_state;
pasid_state = mn_to_state(mn);
dev_state = pasid_state->device_state;
amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address);
}
static int mn_clear_flush_young(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long address)
{
__mn_flush_page(mn, address);
return 0;
}
static void mn_change_pte(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long address,
pte_t pte)
{
__mn_flush_page(mn, address);
}
static void mn_invalidate_page(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long address)
{
__mn_flush_page(mn, address);
}
static void mn_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
{
struct pasid_state *pasid_state;
struct device_state *dev_state;
pasid_state = mn_to_state(mn);
dev_state = pasid_state->device_state;
amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
__pa(empty_page_table));
}
static void mn_invalidate_range_end(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
{
struct pasid_state *pasid_state;
struct device_state *dev_state;
pasid_state = mn_to_state(mn);
dev_state = pasid_state->device_state;
amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
__pa(pasid_state->mm->pgd));
}
static struct mmu_notifier_ops iommu_mn = {
.clear_flush_young = mn_clear_flush_young,
.change_pte = mn_change_pte,
.invalidate_page = mn_invalidate_page,
.invalidate_range_start = mn_invalidate_range_start,
.invalidate_range_end = mn_invalidate_range_end,
};
static void set_pri_tag_status(struct pasid_state *pasid_state,
u16 tag, int status)
{
unsigned long flags;
spin_lock_irqsave(&pasid_state->lock, flags);
pasid_state->pri[tag].status = status;
spin_unlock_irqrestore(&pasid_state->lock, flags);
}
static void finish_pri_tag(struct device_state *dev_state,
struct pasid_state *pasid_state,
u16 tag)
{
unsigned long flags;
spin_lock_irqsave(&pasid_state->lock, flags);
if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) &&
pasid_state->pri[tag].finish) {
amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid,
pasid_state->pri[tag].status, tag);
pasid_state->pri[tag].finish = false;
pasid_state->pri[tag].status = PPR_SUCCESS;
}
spin_unlock_irqrestore(&pasid_state->lock, flags);
}
static void do_fault(struct work_struct *work)
{
struct fault *fault = container_of(work, struct fault, work);
int npages, write;
struct page *page;
write = !!(fault->flags & PPR_FAULT_WRITE);
npages = get_user_pages(fault->state->task, fault->state->mm,
fault->address, 1, write, 0, &page, NULL);
if (npages == 1) {
put_page(page);
} else if (fault->dev_state->inv_ppr_cb) {
int status;
status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev,
fault->pasid,
fault->address,
fault->flags);
switch (status) {
case AMD_IOMMU_INV_PRI_RSP_SUCCESS:
set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS);
break;
case AMD_IOMMU_INV_PRI_RSP_INVALID:
set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
break;
case AMD_IOMMU_INV_PRI_RSP_FAIL:
set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE);
break;
default:
BUG();
}
} else {
set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
}
finish_pri_tag(fault->dev_state, fault->state, fault->tag);
put_pasid_state(fault->state);
kfree(fault);
}
static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
{
struct amd_iommu_fault *iommu_fault;
struct pasid_state *pasid_state;
struct device_state *dev_state;
unsigned long flags;
struct fault *fault;
bool finish;
u16 tag;
int ret;
iommu_fault = data;
tag = iommu_fault->tag & 0x1ff;
finish = (iommu_fault->tag >> 9) & 1;
ret = NOTIFY_DONE;
dev_state = get_device_state(iommu_fault->device_id);
if (dev_state == NULL)
goto out;
pasid_state = get_pasid_state(dev_state, iommu_fault->pasid);
if (pasid_state == NULL) {
/* We know the device but not the PASID -> send INVALID */
amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid,
PPR_INVALID, tag);
goto out_drop_state;
}
spin_lock_irqsave(&pasid_state->lock, flags);
atomic_inc(&pasid_state->pri[tag].inflight);
if (finish)
pasid_state->pri[tag].finish = true;
spin_unlock_irqrestore(&pasid_state->lock, flags);
fault = kzalloc(sizeof(*fault), GFP_ATOMIC);
if (fault == NULL) {
/* We are OOM - send success and let the device re-fault */
finish_pri_tag(dev_state, pasid_state, tag);
goto out_drop_state;
}
fault->dev_state = dev_state;
fault->address = iommu_fault->address;
fault->state = pasid_state;
fault->tag = tag;
fault->finish = finish;
fault->flags = iommu_fault->flags;
INIT_WORK(&fault->work, do_fault);
queue_work(iommu_wq, &fault->work);
ret = NOTIFY_OK;
out_drop_state:
put_device_state(dev_state);
out:
return ret;
}
static struct notifier_block ppr_nb = {
.notifier_call = ppr_notifier,
};
static int task_exit(struct notifier_block *nb, unsigned long e, void *data)
{
struct pasid_state *pasid_state;
struct task_struct *task;
task = data;
/*
* Using this notifier is a hack - but there is no other choice
* at the moment. What I really want is a sleeping notifier that
* is called when an MM goes down. But such a notifier doesn't
* exist yet. The notifier needs to sleep because it has to make
* sure that the device does not use the PASID and the address
* space anymore before it is destroyed. This includes waiting
* for pending PRI requests to pass the workqueue. The
* MMU-Notifiers would be a good fit, but they use RCU and so
* they are not allowed to sleep. Lets see how we can solve this
* in a more intelligent way in the future.
*/
again:
spin_lock(&ps_lock);
list_for_each_entry(pasid_state, &pasid_state_list, list) {
struct device_state *dev_state;
int pasid;
if (pasid_state->task != task)
continue;
/* Drop Lock and unbind */
spin_unlock(&ps_lock);
dev_state = pasid_state->device_state;
pasid = pasid_state->pasid;
if (pasid_state->device_state->inv_ctx_cb)
dev_state->inv_ctx_cb(dev_state->pdev, pasid);
unbind_pasid(dev_state, pasid);
/* Task may be in the list multiple times */
goto again;
}
spin_unlock(&ps_lock);
return NOTIFY_OK;
}
int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
struct task_struct *task)
{
struct pasid_state *pasid_state;
struct device_state *dev_state;
u16 devid;
int ret;
might_sleep();
if (!amd_iommu_v2_supported())
return -ENODEV;
devid = device_id(pdev);
dev_state = get_device_state(devid);
if (dev_state == NULL)
return -EINVAL;
ret = -EINVAL;
if (pasid < 0 || pasid >= dev_state->max_pasids)
goto out;
ret = -ENOMEM;
pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL);
if (pasid_state == NULL)
goto out;
atomic_set(&pasid_state->count, 1);
init_waitqueue_head(&pasid_state->wq);
pasid_state->task = task;
pasid_state->mm = get_task_mm(task);
pasid_state->device_state = dev_state;
pasid_state->pasid = pasid;
pasid_state->mn.ops = &iommu_mn;
if (pasid_state->mm == NULL)
goto out_free;
mmu_notifier_register(&pasid_state->mn, pasid_state->mm);
ret = set_pasid_state(dev_state, pasid_state, pasid);
if (ret)
goto out_unregister;
ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid,
__pa(pasid_state->mm->pgd));
if (ret)
goto out_clear_state;
link_pasid_state(pasid_state);
return 0;
out_clear_state:
clear_pasid_state(dev_state, pasid);
out_unregister:
mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
out_free:
free_pasid_state(pasid_state);
out:
put_device_state(dev_state);
return ret;
}
EXPORT_SYMBOL(amd_iommu_bind_pasid);
void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
{
struct device_state *dev_state;
u16 devid;
might_sleep();
if (!amd_iommu_v2_supported())
return;
devid = device_id(pdev);
dev_state = get_device_state(devid);
if (dev_state == NULL)
return;
if (pasid < 0 || pasid >= dev_state->max_pasids)
goto out;
unbind_pasid(dev_state, pasid);
out:
put_device_state(dev_state);
}
EXPORT_SYMBOL(amd_iommu_unbind_pasid);
int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
{
struct device_state *dev_state;
unsigned long flags;
int ret, tmp;
u16 devid;
might_sleep();
if (!amd_iommu_v2_supported())
return -ENODEV;
if (pasids <= 0 || pasids > (PASID_MASK + 1))
return -EINVAL;
devid = device_id(pdev);
dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL);
if (dev_state == NULL)
return -ENOMEM;
spin_lock_init(&dev_state->lock);
init_waitqueue_head(&dev_state->wq);
dev_state->pdev = pdev;
tmp = pasids;
for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9)
dev_state->pasid_levels += 1;
atomic_set(&dev_state->count, 1);
dev_state->max_pasids = pasids;
ret = -ENOMEM;
dev_state->states = (void *)get_zeroed_page(GFP_KERNEL);
if (dev_state->states == NULL)
goto out_free_dev_state;
dev_state->domain = iommu_domain_alloc(&pci_bus_type);
if (dev_state->domain == NULL)
goto out_free_states;
amd_iommu_domain_direct_map(dev_state->domain);
ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
if (ret)
goto out_free_domain;
ret = iommu_attach_device(dev_state->domain, &pdev->dev);
if (ret != 0)
goto out_free_domain;
spin_lock_irqsave(&state_lock, flags);
if (state_table[devid] != NULL) {
spin_unlock_irqrestore(&state_lock, flags);
ret = -EBUSY;
goto out_free_domain;
}
state_table[devid] = dev_state;
spin_unlock_irqrestore(&state_lock, flags);
return 0;
out_free_domain:
iommu_domain_free(dev_state->domain);
out_free_states:
free_page((unsigned long)dev_state->states);
out_free_dev_state:
kfree(dev_state);
return ret;
}
EXPORT_SYMBOL(amd_iommu_init_device);
void amd_iommu_free_device(struct pci_dev *pdev)
{
struct device_state *dev_state;
unsigned long flags;
u16 devid;
if (!amd_iommu_v2_supported())
return;
devid = device_id(pdev);
spin_lock_irqsave(&state_lock, flags);
dev_state = state_table[devid];
if (dev_state == NULL) {
spin_unlock_irqrestore(&state_lock, flags);
return;
}
state_table[devid] = NULL;
spin_unlock_irqrestore(&state_lock, flags);
/* Get rid of any remaining pasid states */
free_pasid_states(dev_state);
put_device_state_wait(dev_state);
}
EXPORT_SYMBOL(amd_iommu_free_device);
int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
amd_iommu_invalid_ppr_cb cb)
{
struct device_state *dev_state;
unsigned long flags;
u16 devid;
int ret;
if (!amd_iommu_v2_supported())
return -ENODEV;
devid = device_id(pdev);
spin_lock_irqsave(&state_lock, flags);
ret = -EINVAL;
dev_state = state_table[devid];
if (dev_state == NULL)
goto out_unlock;
dev_state->inv_ppr_cb = cb;
ret = 0;
out_unlock:
spin_unlock_irqrestore(&state_lock, flags);
return ret;
}
EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb);
int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
amd_iommu_invalidate_ctx cb)
{
struct device_state *dev_state;
unsigned long flags;
u16 devid;
int ret;
if (!amd_iommu_v2_supported())
return -ENODEV;
devid = device_id(pdev);
spin_lock_irqsave(&state_lock, flags);
ret = -EINVAL;
dev_state = state_table[devid];
if (dev_state == NULL)
goto out_unlock;
dev_state->inv_ctx_cb = cb;
ret = 0;
out_unlock:
spin_unlock_irqrestore(&state_lock, flags);
return ret;
}
EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb);
static int __init amd_iommu_v2_init(void)
{
size_t state_table_size;
int ret;
pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>");
spin_lock_init(&state_lock);
state_table_size = MAX_DEVICES * sizeof(struct device_state *);
state_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(state_table_size));
if (state_table == NULL)
return -ENOMEM;
ret = -ENOMEM;
iommu_wq = create_workqueue("amd_iommu_v2");
if (iommu_wq == NULL)
goto out_free;
ret = -ENOMEM;
empty_page_table = (u64 *)get_zeroed_page(GFP_KERNEL);
if (empty_page_table == NULL)
goto out_destroy_wq;
amd_iommu_register_ppr_notifier(&ppr_nb);
profile_event_register(PROFILE_TASK_EXIT, &profile_nb);
return 0;
out_destroy_wq:
destroy_workqueue(iommu_wq);
out_free:
free_pages((unsigned long)state_table, get_order(state_table_size));
return ret;
}
static void __exit amd_iommu_v2_exit(void)
{
struct device_state *dev_state;
size_t state_table_size;
int i;
profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb);
amd_iommu_unregister_ppr_notifier(&ppr_nb);
flush_workqueue(iommu_wq);
/*
* The loop below might call flush_workqueue(), so call
* destroy_workqueue() after it
*/
for (i = 0; i < MAX_DEVICES; ++i) {
dev_state = get_device_state(i);
if (dev_state == NULL)
continue;
WARN_ON_ONCE(1);
put_device_state(dev_state);
amd_iommu_free_device(dev_state->pdev);
}
destroy_workqueue(iommu_wq);
state_table_size = MAX_DEVICES * sizeof(struct device_state *);
free_pages((unsigned long)state_table, get_order(state_table_size));
free_page((unsigned long)empty_page_table);
}
module_init(amd_iommu_v2_init);
module_exit(amd_iommu_v2_exit);
......@@ -79,6 +79,24 @@
#define LEVEL_STRIDE (9)
#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
/*
* This bitmap is used to advertise the page sizes our hardware support
* to the IOMMU core, which will then use this information to split
* physically contiguous memory regions it is mapping into page sizes
* that we support.
*
* Traditionally the IOMMU core just handed us the mappings directly,
* after making sure the size is an order of a 4KiB page and that the
* mapping has natural alignment.
*
* To retain this behavior, we currently advertise that we support
* all page sizes that are an order of 4KiB.
*
* If at some point we'd like to utilize the IOMMU core's new behavior,
* we could change this to advertise the real page sizes we support.
*/
#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
static inline int agaw_to_level(int agaw)
{
return agaw + 2;
......@@ -3979,12 +3997,11 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
static int intel_iommu_map(struct iommu_domain *domain,
unsigned long iova, phys_addr_t hpa,
int gfp_order, int iommu_prot)
size_t size, int iommu_prot)
{
struct dmar_domain *dmar_domain = domain->priv;
u64 max_addr;
int prot = 0;
size_t size;
int ret;
if (iommu_prot & IOMMU_READ)
......@@ -3994,7 +4011,6 @@ static int intel_iommu_map(struct iommu_domain *domain,
if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
prot |= DMA_PTE_SNP;
size = PAGE_SIZE << gfp_order;
max_addr = iova + size;
if (dmar_domain->max_addr < max_addr) {
u64 end;
......@@ -4017,11 +4033,10 @@ static int intel_iommu_map(struct iommu_domain *domain,
return ret;
}
static int intel_iommu_unmap(struct iommu_domain *domain,
unsigned long iova, int gfp_order)
static size_t intel_iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
struct dmar_domain *dmar_domain = domain->priv;
size_t size = PAGE_SIZE << gfp_order;
int order;
order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
......@@ -4030,7 +4045,7 @@ static int intel_iommu_unmap(struct iommu_domain *domain,
if (dmar_domain->max_addr == iova + size)
dmar_domain->max_addr = iova;
return order;
return PAGE_SIZE << order;
}
static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
......@@ -4060,6 +4075,54 @@ static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
return 0;
}
/*
* Group numbers are arbitrary. Device with the same group number
* indicate the iommu cannot differentiate between them. To avoid
* tracking used groups we just use the seg|bus|devfn of the lowest
* level we're able to differentiate devices
*/
static int intel_iommu_device_group(struct device *dev, unsigned int *groupid)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct pci_dev *bridge;
union {
struct {
u8 devfn;
u8 bus;
u16 segment;
} pci;
u32 group;
} id;
if (iommu_no_mapping(dev))
return -ENODEV;
id.pci.segment = pci_domain_nr(pdev->bus);
id.pci.bus = pdev->bus->number;
id.pci.devfn = pdev->devfn;
if (!device_to_iommu(id.pci.segment, id.pci.bus, id.pci.devfn))
return -ENODEV;
bridge = pci_find_upstream_pcie_bridge(pdev);
if (bridge) {
if (pci_is_pcie(bridge)) {
id.pci.bus = bridge->subordinate->number;
id.pci.devfn = 0;
} else {
id.pci.bus = bridge->bus->number;
id.pci.devfn = bridge->devfn;
}
}
if (!pdev->is_virtfn && iommu_group_mf)
id.pci.devfn = PCI_DEVFN(PCI_SLOT(id.pci.devfn), 0);
*groupid = id.group;
return 0;
}
static struct iommu_ops intel_iommu_ops = {
.domain_init = intel_iommu_domain_init,
.domain_destroy = intel_iommu_domain_destroy,
......@@ -4069,6 +4132,8 @@ static struct iommu_ops intel_iommu_ops = {
.unmap = intel_iommu_unmap,
.iova_to_phys = intel_iommu_iova_to_phys,
.domain_has_cap = intel_iommu_domain_has_cap,
.device_group = intel_iommu_device_group,
.pgsize_bitmap = INTEL_IOMMU_PGSIZES,
};
static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
......
......@@ -16,6 +16,8 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#define pr_fmt(fmt) "%s: " fmt, __func__
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/bug.h>
......@@ -25,8 +27,59 @@
#include <linux/errno.h>
#include <linux/iommu.h>
static ssize_t show_iommu_group(struct device *dev,
struct device_attribute *attr, char *buf)
{
unsigned int groupid;
if (iommu_device_group(dev, &groupid))
return 0;
return sprintf(buf, "%u", groupid);
}
static DEVICE_ATTR(iommu_group, S_IRUGO, show_iommu_group, NULL);
static int add_iommu_group(struct device *dev, void *data)
{
unsigned int groupid;
if (iommu_device_group(dev, &groupid) == 0)
return device_create_file(dev, &dev_attr_iommu_group);
return 0;
}
static int remove_iommu_group(struct device *dev)
{
unsigned int groupid;
if (iommu_device_group(dev, &groupid) == 0)
device_remove_file(dev, &dev_attr_iommu_group);
return 0;
}
static int iommu_device_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct device *dev = data;
if (action == BUS_NOTIFY_ADD_DEVICE)
return add_iommu_group(dev, NULL);
else if (action == BUS_NOTIFY_DEL_DEVICE)
return remove_iommu_group(dev);
return 0;
}
static struct notifier_block iommu_device_nb = {
.notifier_call = iommu_device_notifier,
};
static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops)
{
bus_register_notifier(bus, &iommu_device_nb);
bus_for_each_dev(bus, NULL, NULL, add_iommu_group);
}
/**
......@@ -157,32 +210,134 @@ int iommu_domain_has_cap(struct iommu_domain *domain,
EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
int iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, int gfp_order, int prot)
phys_addr_t paddr, size_t size, int prot)
{
size_t size;
unsigned long orig_iova = iova;
unsigned int min_pagesz;
size_t orig_size = size;
int ret = 0;
if (unlikely(domain->ops->map == NULL))
return -ENODEV;
size = PAGE_SIZE << gfp_order;
/* find out the minimum page size supported */
min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
/*
* both the virtual address and the physical one, as well as
* the size of the mapping, must be aligned (at least) to the
* size of the smallest page supported by the hardware
*/
if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
pr_err("unaligned: iova 0x%lx pa 0x%lx size 0x%lx min_pagesz "
"0x%x\n", iova, (unsigned long)paddr,
(unsigned long)size, min_pagesz);
return -EINVAL;
}
pr_debug("map: iova 0x%lx pa 0x%lx size 0x%lx\n", iova,
(unsigned long)paddr, (unsigned long)size);
while (size) {
unsigned long pgsize, addr_merge = iova | paddr;
unsigned int pgsize_idx;
/* Max page size that still fits into 'size' */
pgsize_idx = __fls(size);
/* need to consider alignment requirements ? */
if (likely(addr_merge)) {
/* Max page size allowed by both iova and paddr */
unsigned int align_pgsize_idx = __ffs(addr_merge);
pgsize_idx = min(pgsize_idx, align_pgsize_idx);
}
/* build a mask of acceptable page sizes */
pgsize = (1UL << (pgsize_idx + 1)) - 1;
BUG_ON(!IS_ALIGNED(iova | paddr, size));
/* throw away page sizes not supported by the hardware */
pgsize &= domain->ops->pgsize_bitmap;
return domain->ops->map(domain, iova, paddr, gfp_order, prot);
/* make sure we're still sane */
BUG_ON(!pgsize);
/* pick the biggest page */
pgsize_idx = __fls(pgsize);
pgsize = 1UL << pgsize_idx;
pr_debug("mapping: iova 0x%lx pa 0x%lx pgsize %lu\n", iova,
(unsigned long)paddr, pgsize);
ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
if (ret)
break;
iova += pgsize;
paddr += pgsize;
size -= pgsize;
}
/* unroll mapping in case something went wrong */
if (ret)
iommu_unmap(domain, orig_iova, orig_size - size);
return ret;
}
EXPORT_SYMBOL_GPL(iommu_map);
int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
{
size_t size;
size_t unmapped_page, unmapped = 0;
unsigned int min_pagesz;
if (unlikely(domain->ops->unmap == NULL))
return -ENODEV;
size = PAGE_SIZE << gfp_order;
/* find out the minimum page size supported */
min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
/*
* The virtual address, as well as the size of the mapping, must be
* aligned (at least) to the size of the smallest page supported
* by the hardware
*/
if (!IS_ALIGNED(iova | size, min_pagesz)) {
pr_err("unaligned: iova 0x%lx size 0x%lx min_pagesz 0x%x\n",
iova, (unsigned long)size, min_pagesz);
return -EINVAL;
}
pr_debug("unmap this: iova 0x%lx size 0x%lx\n", iova,
(unsigned long)size);
/*
* Keep iterating until we either unmap 'size' bytes (or more)
* or we hit an area that isn't mapped.
*/
while (unmapped < size) {
size_t left = size - unmapped;
unmapped_page = domain->ops->unmap(domain, iova, left);
if (!unmapped_page)
break;
pr_debug("unmapped: iova 0x%lx size %lx\n", iova,
(unsigned long)unmapped_page);
iova += unmapped_page;
unmapped += unmapped_page;
}
return unmapped;
}
EXPORT_SYMBOL_GPL(iommu_unmap);
BUG_ON(!IS_ALIGNED(iova, size));
int iommu_device_group(struct device *dev, unsigned int *groupid)
{
if (iommu_present(dev->bus) && dev->bus->iommu_ops->device_group)
return dev->bus->iommu_ops->device_group(dev, groupid);
return domain->ops->unmap(domain, iova, gfp_order);
return -ENODEV;
}
EXPORT_SYMBOL_GPL(iommu_unmap);
EXPORT_SYMBOL_GPL(iommu_device_group);
......@@ -42,6 +42,9 @@ __asm__ __volatile__ ( \
#define RCP15_PRRR(reg) MRC(reg, p15, 0, c10, c2, 0)
#define RCP15_NMRR(reg) MRC(reg, p15, 0, c10, c2, 1)
/* bitmap of the page sizes currently supported */
#define MSM_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M)
static int msm_iommu_tex_class[4];
DEFINE_SPINLOCK(msm_iommu_lock);
......@@ -352,7 +355,7 @@ static void msm_iommu_detach_dev(struct iommu_domain *domain,
}
static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
phys_addr_t pa, int order, int prot)
phys_addr_t pa, size_t len, int prot)
{
struct msm_priv *priv;
unsigned long flags;
......@@ -363,7 +366,6 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
unsigned long *sl_pte;
unsigned long sl_offset;
unsigned int pgprot;
size_t len = 0x1000UL << order;
int ret = 0, tex, sh;
spin_lock_irqsave(&msm_iommu_lock, flags);
......@@ -463,8 +465,8 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
return ret;
}
static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
int order)
static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
size_t len)
{
struct msm_priv *priv;
unsigned long flags;
......@@ -474,7 +476,6 @@ static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
unsigned long *sl_table;
unsigned long *sl_pte;
unsigned long sl_offset;
size_t len = 0x1000UL << order;
int i, ret = 0;
spin_lock_irqsave(&msm_iommu_lock, flags);
......@@ -544,15 +545,12 @@ static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
ret = __flush_iotlb(domain);
/*
* the IOMMU API requires us to return the order of the unmapped
* page (on success).
*/
if (!ret)
ret = order;
fail:
spin_unlock_irqrestore(&msm_iommu_lock, flags);
return ret;
/* the IOMMU API requires us to return how many bytes were unmapped */
len = ret ? 0 : len;
return len;
}
static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
......@@ -684,7 +682,8 @@ static struct iommu_ops msm_iommu_ops = {
.map = msm_iommu_map,
.unmap = msm_iommu_unmap,
.iova_to_phys = msm_iommu_iova_to_phys,
.domain_has_cap = msm_iommu_domain_has_cap
.domain_has_cap = msm_iommu_domain_has_cap,
.pgsize_bitmap = MSM_IOMMU_PGSIZES,
};
static int __init get_tex_class(int icp, int ocp, int mt, int nos)
......
......@@ -33,6 +33,9 @@
(__i < (n)) && (cr = __iotlb_read_cr((obj), __i), true); \
__i++)
/* bitmap of the page sizes currently supported */
#define OMAP_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M)
/**
* struct omap_iommu_domain - omap iommu domain
* @pgtable: the page table
......@@ -86,20 +89,24 @@ EXPORT_SYMBOL_GPL(omap_uninstall_iommu_arch);
/**
* omap_iommu_save_ctx - Save registers for pm off-mode support
* @obj: target iommu
* @dev: client device
**/
void omap_iommu_save_ctx(struct omap_iommu *obj)
void omap_iommu_save_ctx(struct device *dev)
{
struct omap_iommu *obj = dev_to_omap_iommu(dev);
arch_iommu->save_ctx(obj);
}
EXPORT_SYMBOL_GPL(omap_iommu_save_ctx);
/**
* omap_iommu_restore_ctx - Restore registers for pm off-mode support
* @obj: target iommu
* @dev: client device
**/
void omap_iommu_restore_ctx(struct omap_iommu *obj)
void omap_iommu_restore_ctx(struct device *dev)
{
struct omap_iommu *obj = dev_to_omap_iommu(dev);
arch_iommu->restore_ctx(obj);
}
EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx);
......@@ -819,36 +826,24 @@ static int device_match_by_alias(struct device *dev, void *data)
return strcmp(obj->name, name) == 0;
}
/**
* omap_find_iommu_device() - find an omap iommu device by name
* @name: name of the iommu device
*
* The generic iommu API requires the caller to provide the device
* he wishes to attach to a certain iommu domain.
*
* Drivers generally should not bother with this as it should just
* be taken care of by the DMA-API using dev_archdata.
*
* This function is provided as an interim solution until the latter
* materializes, and omap3isp is fully migrated to the DMA-API.
*/
struct device *omap_find_iommu_device(const char *name)
{
return driver_find_device(&omap_iommu_driver.driver, NULL,
(void *)name,
device_match_by_alias);
}
EXPORT_SYMBOL_GPL(omap_find_iommu_device);
/**
* omap_iommu_attach() - attach iommu device to an iommu domain
* @dev: target omap iommu device
* @name: name of target omap iommu device
* @iopgd: page table
**/
static struct omap_iommu *omap_iommu_attach(struct device *dev, u32 *iopgd)
static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd)
{
int err = -ENOMEM;
struct omap_iommu *obj = to_iommu(dev);
struct device *dev;
struct omap_iommu *obj;
dev = driver_find_device(&omap_iommu_driver.driver, NULL,
(void *)name,
device_match_by_alias);
if (!dev)
return NULL;
obj = to_iommu(dev);
spin_lock(&obj->iommu_lock);
......@@ -1019,12 +1014,11 @@ static void iopte_cachep_ctor(void *iopte)
}
static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
phys_addr_t pa, int order, int prot)
phys_addr_t pa, size_t bytes, int prot)
{
struct omap_iommu_domain *omap_domain = domain->priv;
struct omap_iommu *oiommu = omap_domain->iommu_dev;
struct device *dev = oiommu->dev;
size_t bytes = PAGE_SIZE << order;
struct iotlb_entry e;
int omap_pgsz;
u32 ret, flags;
......@@ -1049,19 +1043,16 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
return ret;
}
static int omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
int order)
static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
size_t size)
{
struct omap_iommu_domain *omap_domain = domain->priv;
struct omap_iommu *oiommu = omap_domain->iommu_dev;
struct device *dev = oiommu->dev;
size_t unmap_size;
dev_dbg(dev, "unmapping da 0x%lx order %d\n", da, order);
unmap_size = iopgtable_clear_entry(oiommu, da);
dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size);
return unmap_size ? get_order(unmap_size) : -EINVAL;
return iopgtable_clear_entry(oiommu, da);
}
static int
......@@ -1069,6 +1060,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
struct omap_iommu_domain *omap_domain = domain->priv;
struct omap_iommu *oiommu;
struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
int ret = 0;
spin_lock(&omap_domain->lock);
......@@ -1081,14 +1073,14 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
}
/* get a handle to and enable the omap iommu */
oiommu = omap_iommu_attach(dev, omap_domain->pgtable);
oiommu = omap_iommu_attach(arch_data->name, omap_domain->pgtable);
if (IS_ERR(oiommu)) {
ret = PTR_ERR(oiommu);
dev_err(dev, "can't get omap iommu: %d\n", ret);
goto out;
}
omap_domain->iommu_dev = oiommu;
omap_domain->iommu_dev = arch_data->iommu_dev = oiommu;
oiommu->domain = domain;
out:
......@@ -1100,7 +1092,8 @@ static void omap_iommu_detach_dev(struct iommu_domain *domain,
struct device *dev)
{
struct omap_iommu_domain *omap_domain = domain->priv;
struct omap_iommu *oiommu = to_iommu(dev);
struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
struct omap_iommu *oiommu = dev_to_omap_iommu(dev);
spin_lock(&omap_domain->lock);
......@@ -1114,7 +1107,7 @@ static void omap_iommu_detach_dev(struct iommu_domain *domain,
omap_iommu_detach(oiommu);
omap_domain->iommu_dev = NULL;
omap_domain->iommu_dev = arch_data->iommu_dev = NULL;
out:
spin_unlock(&omap_domain->lock);
......@@ -1183,14 +1176,14 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
else if (iopte_is_large(*pte))
ret = omap_iommu_translate(*pte, da, IOLARGE_MASK);
else
dev_err(dev, "bogus pte 0x%x", *pte);
dev_err(dev, "bogus pte 0x%x, da 0x%lx", *pte, da);
} else {
if (iopgd_is_section(*pgd))
ret = omap_iommu_translate(*pgd, da, IOSECTION_MASK);
else if (iopgd_is_super(*pgd))
ret = omap_iommu_translate(*pgd, da, IOSUPER_MASK);
else
dev_err(dev, "bogus pgd 0x%x", *pgd);
dev_err(dev, "bogus pgd 0x%x, da 0x%lx", *pgd, da);
}
return ret;
......@@ -1211,6 +1204,7 @@ static struct iommu_ops omap_iommu_ops = {
.unmap = omap_iommu_unmap,
.iova_to_phys = omap_iommu_iova_to_phys,
.domain_has_cap = omap_iommu_domain_has_cap,
.pgsize_bitmap = OMAP_IOMMU_PGSIZES,
};
static int __init omap_iommu_init(void)
......
......@@ -231,12 +231,14 @@ static struct iovm_struct *__find_iovm_area(struct omap_iommu *obj,
/**
* omap_find_iovm_area - find iovma which includes @da
* @dev: client device
* @da: iommu device virtual address
*
* Find the existing iovma starting at @da
*/
struct iovm_struct *omap_find_iovm_area(struct omap_iommu *obj, u32 da)
struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da)
{
struct omap_iommu *obj = dev_to_omap_iommu(dev);
struct iovm_struct *area;
mutex_lock(&obj->mmap_lock);
......@@ -343,14 +345,15 @@ static void free_iovm_area(struct omap_iommu *obj, struct iovm_struct *area)
/**
* omap_da_to_va - convert (d) to (v)
* @obj: objective iommu
* @dev: client device
* @da: iommu device virtual address
* @va: mpu virtual address
*
* Returns mpu virtual addr which corresponds to a given device virtual addr
*/
void *omap_da_to_va(struct omap_iommu *obj, u32 da)
void *omap_da_to_va(struct device *dev, u32 da)
{
struct omap_iommu *obj = dev_to_omap_iommu(dev);
void *va = NULL;
struct iovm_struct *area;
......@@ -410,7 +413,6 @@ static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
unsigned int i, j;
struct scatterlist *sg;
u32 da = new->da_start;
int order;
if (!domain || !sgt)
return -EINVAL;
......@@ -429,12 +431,10 @@ static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
if (bytes_to_iopgsz(bytes) < 0)
goto err_out;
order = get_order(bytes);
pr_debug("%s: [%d] %08x %08x(%x)\n", __func__,
i, da, pa, bytes);
err = iommu_map(domain, da, pa, order, flags);
err = iommu_map(domain, da, pa, bytes, flags);
if (err)
goto err_out;
......@@ -449,10 +449,9 @@ static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
size_t bytes;
bytes = sg->length + sg->offset;
order = get_order(bytes);
/* ignore failures.. we're already handling one */
iommu_unmap(domain, da, order);
iommu_unmap(domain, da, bytes);
da += bytes;
}
......@@ -467,7 +466,8 @@ static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj,
size_t total = area->da_end - area->da_start;
const struct sg_table *sgt = area->sgt;
struct scatterlist *sg;
int i, err;
int i;
size_t unmapped;
BUG_ON(!sgtable_ok(sgt));
BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE));
......@@ -475,13 +475,11 @@ static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj,
start = area->da_start;
for_each_sg(sgt->sgl, sg, sgt->nents, i) {
size_t bytes;
int order;
bytes = sg->length + sg->offset;
order = get_order(bytes);
err = iommu_unmap(domain, start, order);
if (err < 0)
unmapped = iommu_unmap(domain, start, bytes);
if (unmapped < bytes)
break;
dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n",
......@@ -582,16 +580,18 @@ __iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj,
/**
* omap_iommu_vmap - (d)-(p)-(v) address mapper
* @obj: objective iommu
* @domain: iommu domain
* @dev: client device
* @sgt: address of scatter gather table
* @flags: iovma and page property
*
* Creates 1-n-1 mapping with given @sgt and returns @da.
* All @sgt element must be io page size aligned.
*/
u32 omap_iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj, u32 da,
u32 omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da,
const struct sg_table *sgt, u32 flags)
{
struct omap_iommu *obj = dev_to_omap_iommu(dev);
size_t bytes;
void *va = NULL;
......@@ -622,15 +622,17 @@ EXPORT_SYMBOL_GPL(omap_iommu_vmap);
/**
* omap_iommu_vunmap - release virtual mapping obtained by 'omap_iommu_vmap()'
* @obj: objective iommu
* @domain: iommu domain
* @dev: client device
* @da: iommu device virtual address
*
* Free the iommu virtually contiguous memory area starting at
* @da, which was returned by 'omap_iommu_vmap()'.
*/
struct sg_table *
omap_iommu_vunmap(struct iommu_domain *domain, struct omap_iommu *obj, u32 da)
omap_iommu_vunmap(struct iommu_domain *domain, struct device *dev, u32 da)
{
struct omap_iommu *obj = dev_to_omap_iommu(dev);
struct sg_table *sgt;
/*
* 'sgt' is allocated before 'omap_iommu_vmalloc()' is called.
......@@ -647,7 +649,7 @@ EXPORT_SYMBOL_GPL(omap_iommu_vunmap);
/**
* omap_iommu_vmalloc - (d)-(p)-(v) address allocator and mapper
* @obj: objective iommu
* @dev: client device
* @da: contiguous iommu virtual memory
* @bytes: allocation size
* @flags: iovma and page property
......@@ -656,9 +658,10 @@ EXPORT_SYMBOL_GPL(omap_iommu_vunmap);
* @da again, which might be adjusted if 'IOVMF_DA_FIXED' is not set.
*/
u32
omap_iommu_vmalloc(struct iommu_domain *domain, struct omap_iommu *obj, u32 da,
omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev, u32 da,
size_t bytes, u32 flags)
{
struct omap_iommu *obj = dev_to_omap_iommu(dev);
void *va;
struct sg_table *sgt;
......@@ -698,15 +701,16 @@ EXPORT_SYMBOL_GPL(omap_iommu_vmalloc);
/**
* omap_iommu_vfree - release memory allocated by 'omap_iommu_vmalloc()'
* @obj: objective iommu
* @dev: client device
* @da: iommu device virtual address
*
* Frees the iommu virtually continuous memory area starting at
* @da, as obtained from 'omap_iommu_vmalloc()'.
*/
void omap_iommu_vfree(struct iommu_domain *domain, struct omap_iommu *obj,
void omap_iommu_vfree(struct iommu_domain *domain, struct device *dev,
const u32 da)
{
struct omap_iommu *obj = dev_to_omap_iommu(dev);
struct sg_table *sgt;
sgt = unmap_vm_area(domain, obj, da, vfree,
......
......@@ -80,13 +80,6 @@
#include "isph3a.h"
#include "isphist.h"
/*
* this is provided as an interim solution until omap3isp doesn't need
* any omap-specific iommu API
*/
#define to_iommu(dev) \
(struct omap_iommu *)platform_get_drvdata(to_platform_device(dev))
static unsigned int autoidle;
module_param(autoidle, int, 0444);
MODULE_PARM_DESC(autoidle, "Enable OMAP3ISP AUTOIDLE support");
......@@ -1114,8 +1107,7 @@ isp_restore_context(struct isp_device *isp, struct isp_reg *reg_list)
static void isp_save_ctx(struct isp_device *isp)
{
isp_save_context(isp, isp_reg_list);
if (isp->iommu)
omap_iommu_save_ctx(isp->iommu);
omap_iommu_save_ctx(isp->dev);
}
/*
......@@ -1128,8 +1120,7 @@ static void isp_save_ctx(struct isp_device *isp)
static void isp_restore_ctx(struct isp_device *isp)
{
isp_restore_context(isp, isp_reg_list);
if (isp->iommu)
omap_iommu_restore_ctx(isp->iommu);
omap_iommu_restore_ctx(isp->dev);
omap3isp_ccdc_restore_context(isp);
omap3isp_preview_restore_context(isp);
}
......@@ -1983,7 +1974,7 @@ static int isp_remove(struct platform_device *pdev)
isp_cleanup_modules(isp);
omap3isp_get(isp);
iommu_detach_device(isp->domain, isp->iommu_dev);
iommu_detach_device(isp->domain, &pdev->dev);
iommu_domain_free(isp->domain);
omap3isp_put(isp);
......@@ -2131,17 +2122,6 @@ static int isp_probe(struct platform_device *pdev)
}
}
/* IOMMU */
isp->iommu_dev = omap_find_iommu_device("isp");
if (!isp->iommu_dev) {
dev_err(isp->dev, "omap_find_iommu_device failed\n");
ret = -ENODEV;
goto error_isp;
}
/* to be removed once iommu migration is complete */
isp->iommu = to_iommu(isp->iommu_dev);
isp->domain = iommu_domain_alloc(pdev->dev.bus);
if (!isp->domain) {
dev_err(isp->dev, "can't alloc iommu domain\n");
......@@ -2149,7 +2129,7 @@ static int isp_probe(struct platform_device *pdev)
goto error_isp;
}
ret = iommu_attach_device(isp->domain, isp->iommu_dev);
ret = iommu_attach_device(isp->domain, &pdev->dev);
if (ret) {
dev_err(&pdev->dev, "can't attach iommu device: %d\n", ret);
goto free_domain;
......@@ -2188,7 +2168,7 @@ static int isp_probe(struct platform_device *pdev)
error_irq:
free_irq(isp->irq_num, isp);
detach_dev:
iommu_detach_device(isp->domain, isp->iommu_dev);
iommu_detach_device(isp->domain, &pdev->dev);
free_domain:
iommu_domain_free(isp->domain);
error_isp:
......
......@@ -212,9 +212,7 @@ struct isp_device {
unsigned int sbl_resources;
unsigned int subclk_resources;
struct omap_iommu *iommu;
struct iommu_domain *domain;
struct device *iommu_dev;
struct isp_platform_callback platform_cb;
};
......
......@@ -366,7 +366,7 @@ static void ccdc_lsc_free_request(struct isp_ccdc_device *ccdc,
dma_unmap_sg(isp->dev, req->iovm->sgt->sgl,
req->iovm->sgt->nents, DMA_TO_DEVICE);
if (req->table)
omap_iommu_vfree(isp->domain, isp->iommu, req->table);
omap_iommu_vfree(isp->domain, isp->dev, req->table);
kfree(req);
}
......@@ -438,7 +438,7 @@ static int ccdc_lsc_config(struct isp_ccdc_device *ccdc,
req->enable = 1;
req->table = omap_iommu_vmalloc(isp->domain, isp->iommu, 0,
req->table = omap_iommu_vmalloc(isp->domain, isp->dev, 0,
req->config.size, IOMMU_FLAG);
if (IS_ERR_VALUE(req->table)) {
req->table = 0;
......@@ -446,7 +446,7 @@ static int ccdc_lsc_config(struct isp_ccdc_device *ccdc,
goto done;
}
req->iovm = omap_find_iovm_area(isp->iommu, req->table);
req->iovm = omap_find_iovm_area(isp->dev, req->table);
if (req->iovm == NULL) {
ret = -ENOMEM;
goto done;
......@@ -462,7 +462,7 @@ static int ccdc_lsc_config(struct isp_ccdc_device *ccdc,
dma_sync_sg_for_cpu(isp->dev, req->iovm->sgt->sgl,
req->iovm->sgt->nents, DMA_TO_DEVICE);
table = omap_da_to_va(isp->iommu, req->table);
table = omap_da_to_va(isp->dev, req->table);
if (copy_from_user(table, config->lsc, req->config.size)) {
ret = -EFAULT;
goto done;
......@@ -734,15 +734,15 @@ static int ccdc_config(struct isp_ccdc_device *ccdc,
* already done by omap_iommu_vmalloc().
*/
size = ccdc->fpc.fpnum * 4;
table_new = omap_iommu_vmalloc(isp->domain, isp->iommu,
table_new = omap_iommu_vmalloc(isp->domain, isp->dev,
0, size, IOMMU_FLAG);
if (IS_ERR_VALUE(table_new))
return -ENOMEM;
if (copy_from_user(omap_da_to_va(isp->iommu, table_new),
if (copy_from_user(omap_da_to_va(isp->dev, table_new),
(__force void __user *)
ccdc->fpc.fpcaddr, size)) {
omap_iommu_vfree(isp->domain, isp->iommu,
omap_iommu_vfree(isp->domain, isp->dev,
table_new);
return -EFAULT;
}
......@@ -753,7 +753,7 @@ static int ccdc_config(struct isp_ccdc_device *ccdc,
ccdc_configure_fpc(ccdc);
if (table_old != 0)
omap_iommu_vfree(isp->domain, isp->iommu, table_old);
omap_iommu_vfree(isp->domain, isp->dev, table_old);
}
return ccdc_lsc_config(ccdc, ccdc_struct);
......@@ -2309,7 +2309,7 @@ void omap3isp_ccdc_cleanup(struct isp_device *isp)
ccdc_lsc_free_queue(ccdc, &ccdc->lsc.free_queue);
if (ccdc->fpc.fpcaddr != 0)
omap_iommu_vfree(isp->domain, isp->iommu, ccdc->fpc.fpcaddr);
omap_iommu_vfree(isp->domain, isp->dev, ccdc->fpc.fpcaddr);
mutex_destroy(&ccdc->ioctl_lock);
}
......@@ -366,7 +366,7 @@ static void isp_stat_bufs_free(struct ispstat *stat)
dma_unmap_sg(isp->dev, buf->iovm->sgt->sgl,
buf->iovm->sgt->nents,
DMA_FROM_DEVICE);
omap_iommu_vfree(isp->domain, isp->iommu,
omap_iommu_vfree(isp->domain, isp->dev,
buf->iommu_addr);
} else {
if (!buf->virt_addr)
......@@ -400,7 +400,7 @@ static int isp_stat_bufs_alloc_iommu(struct ispstat *stat, unsigned int size)
struct iovm_struct *iovm;
WARN_ON(buf->dma_addr);
buf->iommu_addr = omap_iommu_vmalloc(isp->domain, isp->iommu, 0,
buf->iommu_addr = omap_iommu_vmalloc(isp->domain, isp->dev, 0,
size, IOMMU_FLAG);
if (IS_ERR((void *)buf->iommu_addr)) {
dev_err(stat->isp->dev,
......@@ -410,7 +410,7 @@ static int isp_stat_bufs_alloc_iommu(struct ispstat *stat, unsigned int size)
return -ENOMEM;
}
iovm = omap_find_iovm_area(isp->iommu, buf->iommu_addr);
iovm = omap_find_iovm_area(isp->dev, buf->iommu_addr);
if (!iovm ||
!dma_map_sg(isp->dev, iovm->sgt->sgl, iovm->sgt->nents,
DMA_FROM_DEVICE)) {
......@@ -419,7 +419,7 @@ static int isp_stat_bufs_alloc_iommu(struct ispstat *stat, unsigned int size)
}
buf->iovm = iovm;
buf->virt_addr = omap_da_to_va(stat->isp->iommu,
buf->virt_addr = omap_da_to_va(stat->isp->dev,
(u32)buf->iommu_addr);
buf->empty = 1;
dev_dbg(stat->isp->dev, "%s: buffer[%d] allocated."
......
......@@ -453,7 +453,7 @@ ispmmu_vmap(struct isp_device *isp, const struct scatterlist *sglist, int sglen)
sgt->nents = sglen;
sgt->orig_nents = sglen;
da = omap_iommu_vmap(isp->domain, isp->iommu, 0, sgt, IOMMU_FLAG);
da = omap_iommu_vmap(isp->domain, isp->dev, 0, sgt, IOMMU_FLAG);
if (IS_ERR_VALUE(da))
kfree(sgt);
......@@ -469,7 +469,7 @@ static void ispmmu_vunmap(struct isp_device *isp, dma_addr_t da)
{
struct sg_table *sgt;
sgt = omap_iommu_vunmap(isp->domain, isp->iommu, (u32)da);
sgt = omap_iommu_vunmap(isp->domain, isp->dev, (u32)da);
kfree(sgt);
}
......
......@@ -175,21 +175,22 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
u32 max_requests;
int pos;
pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
if (!pos)
return -EINVAL;
pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF, &status);
if ((control & PCI_PRI_ENABLE) || !(status & PCI_PRI_STATUS_STOPPED))
pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
if ((control & PCI_PRI_CTRL_ENABLE) ||
!(status & PCI_PRI_STATUS_STOPPED))
return -EBUSY;
pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ_OFF, &max_requests);
pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, &max_requests);
reqs = min(max_requests, reqs);
pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ_OFF, reqs);
pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs);
control |= PCI_PRI_ENABLE;
pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control);
control |= PCI_PRI_CTRL_ENABLE;
pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
return 0;
}
......@@ -206,13 +207,13 @@ void pci_disable_pri(struct pci_dev *pdev)
u16 control;
int pos;
pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
if (!pos)
return;
pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
control &= ~PCI_PRI_ENABLE;
pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control);
pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
control &= ~PCI_PRI_CTRL_ENABLE;
pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
}
EXPORT_SYMBOL_GPL(pci_disable_pri);
......@@ -227,13 +228,13 @@ bool pci_pri_enabled(struct pci_dev *pdev)
u16 control;
int pos;
pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
if (!pos)
return false;
pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
return (control & PCI_PRI_ENABLE) ? true : false;
return (control & PCI_PRI_CTRL_ENABLE) ? true : false;
}
EXPORT_SYMBOL_GPL(pci_pri_enabled);
......@@ -249,17 +250,17 @@ int pci_reset_pri(struct pci_dev *pdev)
u16 control;
int pos;
pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
if (!pos)
return -EINVAL;
pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
if (control & PCI_PRI_ENABLE)
pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
if (control & PCI_PRI_CTRL_ENABLE)
return -EBUSY;
control |= PCI_PRI_RESET;
control |= PCI_PRI_CTRL_RESET;
pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control);
pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
return 0;
}
......@@ -282,14 +283,14 @@ bool pci_pri_stopped(struct pci_dev *pdev)
u16 control, status;
int pos;
pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
if (!pos)
return true;
pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF, &status);
pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
if (control & PCI_PRI_ENABLE)
if (control & PCI_PRI_CTRL_ENABLE)
return false;
return (status & PCI_PRI_STATUS_STOPPED) ? true : false;
......@@ -311,15 +312,15 @@ int pci_pri_status(struct pci_dev *pdev)
u16 status, control;
int pos;
pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
if (!pos)
return -EINVAL;
pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF, &status);
pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
/* Stopped bit is undefined when enable == 1, so clear it */
if (control & PCI_PRI_ENABLE)
if (control & PCI_PRI_CTRL_ENABLE)
status &= ~PCI_PRI_STATUS_STOPPED;
return status;
......@@ -342,25 +343,25 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
u16 control, supported;
int pos;
pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
if (!pos)
return -EINVAL;
pci_read_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, &control);
pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported);
pci_read_config_word(pdev, pos + PCI_PASID_CTRL, &control);
pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
if (!(supported & PCI_PASID_ENABLE))
if (control & PCI_PASID_CTRL_ENABLE)
return -EINVAL;
supported &= PCI_PASID_EXEC | PCI_PASID_PRIV;
supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
/* User wants to enable anything unsupported? */
if ((supported & features) != features)
return -EINVAL;
control = PCI_PASID_ENABLE | features;
control = PCI_PASID_CTRL_ENABLE | features;
pci_write_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, control);
pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
return 0;
}
......@@ -376,11 +377,11 @@ void pci_disable_pasid(struct pci_dev *pdev)
u16 control = 0;
int pos;
pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
if (!pos)
return;
pci_write_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, control);
pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
}
EXPORT_SYMBOL_GPL(pci_disable_pasid);
......@@ -391,22 +392,21 @@ EXPORT_SYMBOL_GPL(pci_disable_pasid);
* Returns a negative value when no PASI capability is present.
* Otherwise is returns a bitmask with supported features. Current
* features reported are:
* PCI_PASID_ENABLE - PASID capability can be enabled
* PCI_PASID_EXEC - Execute permission supported
* PCI_PASID_PRIV - Priviledged mode supported
* PCI_PASID_CAP_EXEC - Execute permission supported
* PCI_PASID_CAP_PRIV - Priviledged mode supported
*/
int pci_pasid_features(struct pci_dev *pdev)
{
u16 supported;
int pos;
pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
if (!pos)
return -EINVAL;
pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported);
pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
supported &= PCI_PASID_ENABLE | PCI_PASID_EXEC | PCI_PASID_PRIV;
supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
return supported;
}
......@@ -426,11 +426,11 @@ int pci_max_pasids(struct pci_dev *pdev)
u16 supported;
int pos;
pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
if (!pos)
return -EINVAL;
pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported);
pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
supported = (supported & PASID_NUMBER_MASK) >> PASID_NUMBER_SHIFT;
......
......@@ -45,7 +45,6 @@ extern int pciehp_poll_time;
extern int pciehp_debug;
extern int pciehp_force;
extern struct workqueue_struct *pciehp_wq;
extern struct workqueue_struct *pciehp_ordered_wq;
#define dbg(format, arg...) \
do { \
......
......@@ -43,7 +43,6 @@ int pciehp_poll_mode;
int pciehp_poll_time;
int pciehp_force;
struct workqueue_struct *pciehp_wq;
struct workqueue_struct *pciehp_ordered_wq;
#define DRIVER_VERSION "0.4"
#define DRIVER_AUTHOR "Dan Zink <dan.zink@compaq.com>, Greg Kroah-Hartman <greg@kroah.com>, Dely Sy <dely.l.sy@intel.com>"
......@@ -345,18 +344,11 @@ static int __init pcied_init(void)
if (!pciehp_wq)
return -ENOMEM;
pciehp_ordered_wq = alloc_ordered_workqueue("pciehp_ordered", 0);
if (!pciehp_ordered_wq) {
destroy_workqueue(pciehp_wq);
return -ENOMEM;
}
pciehp_firmware_init();
retval = pcie_port_service_register(&hpdriver_portdrv);
dbg("pcie_port_service_register = %d\n", retval);
info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
if (retval) {
destroy_workqueue(pciehp_ordered_wq);
destroy_workqueue(pciehp_wq);
dbg("Failure to register service\n");
}
......@@ -366,9 +358,8 @@ static int __init pcied_init(void)
static void __exit pcied_cleanup(void)
{
dbg("unload_pciehpd()\n");
destroy_workqueue(pciehp_ordered_wq);
destroy_workqueue(pciehp_wq);
pcie_port_service_unregister(&hpdriver_portdrv);
destroy_workqueue(pciehp_wq);
info(DRIVER_DESC " version: " DRIVER_VERSION " unloaded\n");
}
......
......@@ -344,7 +344,7 @@ void pciehp_queue_pushbutton_work(struct work_struct *work)
kfree(info);
goto out;
}
queue_work(pciehp_ordered_wq, &info->work);
queue_work(pciehp_wq, &info->work);
out:
mutex_unlock(&p_slot->lock);
}
......@@ -439,7 +439,7 @@ static void handle_surprise_event(struct slot *p_slot)
else
p_slot->state = POWERON_STATE;
queue_work(pciehp_ordered_wq, &info->work);
queue_work(pciehp_wq, &info->work);
}
static void interrupt_event_handler(struct work_struct *work)
......
......@@ -806,7 +806,6 @@ static void pcie_cleanup_slot(struct controller *ctrl)
struct slot *slot = ctrl->slot;
cancel_delayed_work(&slot->work);
flush_workqueue(pciehp_wq);
flush_workqueue(pciehp_ordered_wq);
kfree(slot);
}
......
......@@ -323,6 +323,8 @@ static void free_msi_irqs(struct pci_dev *dev)
if (list_is_last(&entry->list, &dev->msi_list))
iounmap(entry->mask_base);
}
kobject_del(&entry->kobj);
kobject_put(&entry->kobj);
list_del(&entry->list);
kfree(entry);
}
......@@ -403,6 +405,98 @@ void pci_restore_msi_state(struct pci_dev *dev)
}
EXPORT_SYMBOL_GPL(pci_restore_msi_state);
#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)
struct msi_attribute {
struct attribute attr;
ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
char *buf);
ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
const char *buf, size_t count);
};
static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
char *buf)
{
return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
}
static ssize_t msi_irq_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
struct msi_attribute *attribute = to_msi_attr(attr);
struct msi_desc *entry = to_msi_desc(kobj);
if (!attribute->show)
return -EIO;
return attribute->show(entry, attribute, buf);
}
static const struct sysfs_ops msi_irq_sysfs_ops = {
.show = msi_irq_attr_show,
};
static struct msi_attribute mode_attribute =
__ATTR(mode, S_IRUGO, show_msi_mode, NULL);
struct attribute *msi_irq_default_attrs[] = {
&mode_attribute.attr,
NULL
};
void msi_kobj_release(struct kobject *kobj)
{
struct msi_desc *entry = to_msi_desc(kobj);
pci_dev_put(entry->dev);
}
static struct kobj_type msi_irq_ktype = {
.release = msi_kobj_release,
.sysfs_ops = &msi_irq_sysfs_ops,
.default_attrs = msi_irq_default_attrs,
};
static int populate_msi_sysfs(struct pci_dev *pdev)
{
struct msi_desc *entry;
struct kobject *kobj;
int ret;
int count = 0;
pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
if (!pdev->msi_kset)
return -ENOMEM;
list_for_each_entry(entry, &pdev->msi_list, list) {
kobj = &entry->kobj;
kobj->kset = pdev->msi_kset;
pci_dev_get(pdev);
ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
"%u", entry->irq);
if (ret)
goto out_unroll;
count++;
}
return 0;
out_unroll:
list_for_each_entry(entry, &pdev->msi_list, list) {
if (!count)
break;
kobject_del(&entry->kobj);
kobject_put(&entry->kobj);
count--;
}
return ret;
}
/**
* msi_capability_init - configure device's MSI capability structure
* @dev: pointer to the pci_dev data structure of MSI device function
......@@ -454,6 +548,13 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
return ret;
}
ret = populate_msi_sysfs(dev);
if (ret) {
msi_mask_irq(entry, mask, ~mask);
free_msi_irqs(dev);
return ret;
}
/* Set MSI enabled bits */
pci_intx_for_msi(dev, 0);
msi_set_enable(dev, pos, 1);
......@@ -574,6 +675,12 @@ static int msix_capability_init(struct pci_dev *dev,
msix_program_entries(dev, entries);
ret = populate_msi_sysfs(dev);
if (ret) {
ret = 0;
goto error;
}
/* Set MSI-X enabled bits and unmask the function */
pci_intx_for_msi(dev, 0);
dev->msix_enabled = 1;
......@@ -732,6 +839,8 @@ void pci_disable_msi(struct pci_dev *dev)
pci_msi_shutdown(dev);
free_msi_irqs(dev);
kset_unregister(dev->msi_kset);
dev->msi_kset = NULL;
}
EXPORT_SYMBOL(pci_disable_msi);
......@@ -830,6 +939,8 @@ void pci_disable_msix(struct pci_dev *dev)
pci_msix_shutdown(dev);
free_msi_irqs(dev);
kset_unregister(dev->msi_kset);
dev->msi_kset = NULL;
}
EXPORT_SYMBOL(pci_disable_msix);
......@@ -870,5 +981,15 @@ EXPORT_SYMBOL(pci_msi_enabled);
void pci_msi_init_pci_dev(struct pci_dev *dev)
{
int pos;
INIT_LIST_HEAD(&dev->msi_list);
/* Disable the msi hardware to avoid screaming interrupts
* during boot. This is the power on reset default so
* usually this should be a noop.
*/
pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
if (pos)
msi_set_enable(dev, pos, 0);
msix_set_enable(dev, 0);
}
......@@ -45,16 +45,20 @@ static void pci_acpi_wake_dev(acpi_handle handle, u32 event, void *context)
{
struct pci_dev *pci_dev = context;
if (event == ACPI_NOTIFY_DEVICE_WAKE && pci_dev) {
if (event != ACPI_NOTIFY_DEVICE_WAKE || !pci_dev)
return;
if (!pci_dev->pm_cap || !pci_dev->pme_support
|| pci_check_pme_status(pci_dev)) {
if (pci_dev->pme_poll)
pci_dev->pme_poll = false;
pci_wakeup_event(pci_dev);
pci_check_pme_status(pci_dev);
pm_runtime_resume(&pci_dev->dev);
if (pci_dev->subordinate)
pci_pme_wakeup_bus(pci_dev->subordinate);
}
if (pci_dev->subordinate)
pci_pme_wakeup_bus(pci_dev->subordinate);
}
/**
......@@ -395,7 +399,6 @@ static int __init acpi_pci_init(void)
if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n");
pcie_clear_aspm();
pcie_no_aspm();
}
......
......@@ -68,7 +68,7 @@ struct pcie_link_state {
struct aspm_latency acceptable[8];
};
static int aspm_disabled, aspm_force, aspm_clear_state;
static int aspm_disabled, aspm_force;
static bool aspm_support_enabled = true;
static DEFINE_MUTEX(aspm_lock);
static LIST_HEAD(link_list);
......@@ -500,9 +500,6 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
int pos;
u32 reg32;
if (aspm_clear_state)
return -EINVAL;
/*
* Some functions in a slot might not all be PCIe functions,
* very strange. Disable ASPM for the whole slot
......@@ -574,9 +571,6 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
return;
if (aspm_disabled && !aspm_clear_state)
return;
/* VIA has a strange chipset, root port is under a bridge */
if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT &&
pdev->bus->self)
......@@ -608,7 +602,7 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
* the BIOS's expectation, we'll do so once pci_enable_device() is
* called.
*/
if (aspm_policy != POLICY_POWERSAVE || aspm_clear_state) {
if (aspm_policy != POLICY_POWERSAVE) {
pcie_config_aspm_path(link);
pcie_set_clkpm(link, policy_to_clkpm_state(link));
}
......@@ -649,8 +643,7 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
struct pci_dev *parent = pdev->bus->self;
struct pcie_link_state *link, *root, *parent_link;
if ((aspm_disabled && !aspm_clear_state) || !pci_is_pcie(pdev) ||
!parent || !parent->link_state)
if (!pci_is_pcie(pdev) || !parent || !parent->link_state)
return;
if ((parent->pcie_type != PCI_EXP_TYPE_ROOT_PORT) &&
(parent->pcie_type != PCI_EXP_TYPE_DOWNSTREAM))
......@@ -734,13 +727,18 @@ void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
* pci_disable_link_state - disable pci device's link state, so the link will
* never enter specific states
*/
static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem,
bool force)
{
struct pci_dev *parent = pdev->bus->self;
struct pcie_link_state *link;
if (aspm_disabled || !pci_is_pcie(pdev))
if (aspm_disabled && !force)
return;
if (!pci_is_pcie(pdev))
return;
if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)
parent = pdev;
......@@ -768,16 +766,31 @@ static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
void pci_disable_link_state_locked(struct pci_dev *pdev, int state)
{
__pci_disable_link_state(pdev, state, false);
__pci_disable_link_state(pdev, state, false, false);
}
EXPORT_SYMBOL(pci_disable_link_state_locked);
void pci_disable_link_state(struct pci_dev *pdev, int state)
{
__pci_disable_link_state(pdev, state, true);
__pci_disable_link_state(pdev, state, true, false);
}
EXPORT_SYMBOL(pci_disable_link_state);
void pcie_clear_aspm(struct pci_bus *bus)
{
struct pci_dev *child;
/*
* Clear any ASPM setup that the firmware has carried out on this bus
*/
list_for_each_entry(child, &bus->devices, bus_list) {
__pci_disable_link_state(child, PCIE_LINK_STATE_L0S |
PCIE_LINK_STATE_L1 |
PCIE_LINK_STATE_CLKPM,
false, true);
}
}
static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
{
int i;
......@@ -935,6 +948,7 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
static int __init pcie_aspm_disable(char *str)
{
if (!strcmp(str, "off")) {
aspm_policy = POLICY_DEFAULT;
aspm_disabled = 1;
aspm_support_enabled = false;
printk(KERN_INFO "PCIe ASPM is disabled\n");
......@@ -947,16 +961,18 @@ static int __init pcie_aspm_disable(char *str)
__setup("pcie_aspm=", pcie_aspm_disable);
void pcie_clear_aspm(void)
{
if (!aspm_force)
aspm_clear_state = 1;
}
void pcie_no_aspm(void)
{
if (!aspm_force)
/*
* Disabling ASPM is intended to prevent the kernel from modifying
* existing hardware state, not to clear existing state. To that end:
* (a) set policy to POLICY_DEFAULT in order to avoid changing state
* (b) prevent userspace from changing policy
*/
if (!aspm_force) {
aspm_policy = POLICY_DEFAULT;
aspm_disabled = 1;
}
}
/**
......
......@@ -302,6 +302,10 @@ extern bool osc_sb_apei_support_acked;
OSC_PCI_EXPRESS_PME_CONTROL | \
OSC_PCI_EXPRESS_AER_CONTROL | \
OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
#define OSC_PCI_NATIVE_HOTPLUG (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | \
OSC_SHPC_NATIVE_HP_CONTROL)
extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
u32 *mask, u32 req);
extern void acpi_early_init(void);
......
......@@ -20,12 +20,148 @@
#ifndef _ASM_X86_AMD_IOMMU_H
#define _ASM_X86_AMD_IOMMU_H
#include <linux/irqreturn.h>
#include <linux/types.h>
#ifdef CONFIG_AMD_IOMMU
struct task_struct;
struct pci_dev;
extern int amd_iommu_detect(void);
/**
* amd_iommu_enable_device_erratum() - Enable erratum workaround for device
* in the IOMMUv2 driver
* @pdev: The PCI device the workaround is necessary for
* @erratum: The erratum workaround to enable
*
* The function needs to be called before amd_iommu_init_device().
* Possible values for the erratum number are for now:
* - AMD_PRI_DEV_ERRATUM_ENABLE_RESET - Reset PRI capability when PRI
* is enabled
* - AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE - Limit number of outstanding PRI
* requests to one
*/
#define AMD_PRI_DEV_ERRATUM_ENABLE_RESET 0
#define AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE 1
extern void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum);
/**
* amd_iommu_init_device() - Init device for use with IOMMUv2 driver
* @pdev: The PCI device to initialize
* @pasids: Number of PASIDs to support for this device
*
* This function does all setup for the device pdev so that it can be
* used with IOMMUv2.
* Returns 0 on success or negative value on error.
*/
extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids);
/**
* amd_iommu_free_device() - Free all IOMMUv2 related device resources
* and disable IOMMUv2 usage for this device
* @pdev: The PCI device to disable IOMMUv2 usage for'
*/
extern void amd_iommu_free_device(struct pci_dev *pdev);
/**
* amd_iommu_bind_pasid() - Bind a given task to a PASID on a device
* @pdev: The PCI device to bind the task to
* @pasid: The PASID on the device the task should be bound to
* @task: the task to bind
*
* The function returns 0 on success or a negative value on error.
*/
extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
struct task_struct *task);
/**
* amd_iommu_unbind_pasid() - Unbind a PASID from its task on
* a device
* @pdev: The device of the PASID
* @pasid: The PASID to unbind
*
* When this function returns the device is no longer using the PASID
* and the PASID is no longer bound to its task.
*/
extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid);
/**
* amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed
* PRI requests
* @pdev: The PCI device the call-back should be registered for
* @cb: The call-back function
*
* The IOMMUv2 driver invokes this call-back when it is unable to
* successfully handle a PRI request. The device driver can then decide
* which PRI response the device should see. Possible return values for
* the call-back are:
*
* - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device
* - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device
* - AMD_IOMMU_INV_PRI_RSP_FAIL - Send Failure back to the device,
* the device is required to disable
* PRI when it receives this response
*
* The function returns 0 on success or negative value on error.
*/
#define AMD_IOMMU_INV_PRI_RSP_SUCCESS 0
#define AMD_IOMMU_INV_PRI_RSP_INVALID 1
#define AMD_IOMMU_INV_PRI_RSP_FAIL 2
typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev,
int pasid,
unsigned long address,
u16);
extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
amd_iommu_invalid_ppr_cb cb);
/**
* amd_iommu_device_info() - Get information about IOMMUv2 support of a
* PCI device
* @pdev: PCI device to query information from
* @info: A pointer to an amd_iommu_device_info structure which will contain
* the information about the PCI device
*
* Returns 0 on success, negative value on error
*/
#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */
#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */
#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */
#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8 /* Device may request execution
on memory pages */
#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10 /* Device may request
super-user privileges */
struct amd_iommu_device_info {
int max_pasids;
u32 flags;
};
extern int amd_iommu_device_info(struct pci_dev *pdev,
struct amd_iommu_device_info *info);
/**
* amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating
* a pasid context. This call-back is
* invoked when the IOMMUv2 driver needs to
* invalidate a PASID context, for example
* because the task that is bound to that
* context is about to exit.
*
* @pdev: The PCI device the call-back should be registered for
* @cb: The call-back function
*/
typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, int pasid);
extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
amd_iommu_invalidate_ctx cb);
#else
static inline int amd_iommu_detect(void) { return -ENODEV; }
......
......@@ -48,19 +48,34 @@ struct iommu_domain {
#ifdef CONFIG_IOMMU_API
/**
* struct iommu_ops - iommu ops and capabilities
* @domain_init: init iommu domain
* @domain_destroy: destroy iommu domain
* @attach_dev: attach device to an iommu domain
* @detach_dev: detach device from an iommu domain
* @map: map a physically contiguous memory region to an iommu domain
* @unmap: unmap a physically contiguous memory region from an iommu domain
* @iova_to_phys: translate iova to physical address
* @domain_has_cap: domain capabilities query
* @commit: commit iommu domain
* @pgsize_bitmap: bitmap of supported page sizes
*/
struct iommu_ops {
int (*domain_init)(struct iommu_domain *domain);
void (*domain_destroy)(struct iommu_domain *domain);
int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
int (*map)(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, int gfp_order, int prot);
int (*unmap)(struct iommu_domain *domain, unsigned long iova,
int gfp_order);
phys_addr_t paddr, size_t size, int prot);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
size_t size);
phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
unsigned long iova);
int (*domain_has_cap)(struct iommu_domain *domain,
unsigned long cap);
int (*device_group)(struct device *dev, unsigned int *groupid);
unsigned long pgsize_bitmap;
};
extern int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops);
......@@ -72,15 +87,16 @@ extern int iommu_attach_device(struct iommu_domain *domain,
extern void iommu_detach_device(struct iommu_domain *domain,
struct device *dev);
extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, int gfp_order, int prot);
extern int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
int gfp_order);
phys_addr_t paddr, size_t size, int prot);
extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size);
extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
unsigned long iova);
extern int iommu_domain_has_cap(struct iommu_domain *domain,
unsigned long cap);
extern void iommu_set_fault_handler(struct iommu_domain *domain,
iommu_fault_handler_t handler);
extern int iommu_device_group(struct device *dev, unsigned int *groupid);
/**
* report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
......@@ -179,6 +195,11 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain,
{
}
static inline int iommu_device_group(struct device *dev, unsigned int *groupid)
{
return -ENODEV;
}
#endif /* CONFIG_IOMMU_API */
#endif /* __LINUX_IOMMU_H */
#ifndef LINUX_MSI_H
#define LINUX_MSI_H
#include <linux/kobject.h>
#include <linux/list.h>
struct msi_msg {
......@@ -44,6 +45,8 @@ struct msi_desc {
/* Last set MSI message */
struct msi_msg msg;
struct kobject kobj;
};
/*
......
......@@ -29,7 +29,7 @@ extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
extern void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
extern void pci_disable_link_state(struct pci_dev *pdev, int state);
extern void pci_disable_link_state_locked(struct pci_dev *pdev, int state);
extern void pcie_clear_aspm(void);
extern void pcie_clear_aspm(struct pci_bus *bus);
extern void pcie_no_aspm(void);
#else
static inline void pcie_aspm_init_link_state(struct pci_dev *pdev)
......@@ -47,7 +47,7 @@ static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
static inline void pci_disable_link_state(struct pci_dev *pdev, int state)
{
}
static inline void pcie_clear_aspm(void)
static inline void pcie_clear_aspm(struct pci_bus *bus)
{
}
static inline void pcie_no_aspm(void)
......
......@@ -336,6 +336,7 @@ struct pci_dev {
struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
#ifdef CONFIG_PCI_MSI
struct list_head msi_list;
struct kset *msi_kset;
#endif
struct pci_vpd *vpd;
#ifdef CONFIG_PCI_ATS
......
......@@ -537,7 +537,9 @@
#define PCI_EXT_CAP_ID_ARI 14
#define PCI_EXT_CAP_ID_ATS 15
#define PCI_EXT_CAP_ID_SRIOV 16
#define PCI_EXT_CAP_ID_PRI 19
#define PCI_EXT_CAP_ID_LTR 24
#define PCI_EXT_CAP_ID_PASID 27
/* Advanced Error Reporting */
#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */
......@@ -664,24 +666,24 @@
#define PCI_ATS_MIN_STU 12 /* shift of minimum STU block */
/* Page Request Interface */
#define PCI_PRI_CAP 0x13 /* PRI capability ID */
#define PCI_PRI_CONTROL_OFF 0x04 /* Offset of control register */
#define PCI_PRI_STATUS_OFF 0x06 /* Offset of status register */
#define PCI_PRI_ENABLE 0x0001 /* Enable mask */
#define PCI_PRI_RESET 0x0002 /* Reset bit mask */
#define PCI_PRI_STATUS_RF 0x0001 /* Request Failure */
#define PCI_PRI_STATUS_UPRGI 0x0002 /* Unexpected PRG index */
#define PCI_PRI_STATUS_STOPPED 0x0100 /* PRI Stopped */
#define PCI_PRI_MAX_REQ_OFF 0x08 /* Cap offset for max reqs supported */
#define PCI_PRI_ALLOC_REQ_OFF 0x0c /* Cap offset for max reqs allowed */
#define PCI_PRI_CTRL 0x04 /* PRI control register */
#define PCI_PRI_CTRL_ENABLE 0x01 /* Enable */
#define PCI_PRI_CTRL_RESET 0x02 /* Reset */
#define PCI_PRI_STATUS 0x06 /* PRI status register */
#define PCI_PRI_STATUS_RF 0x001 /* Response Failure */
#define PCI_PRI_STATUS_UPRGI 0x002 /* Unexpected PRG index */
#define PCI_PRI_STATUS_STOPPED 0x100 /* PRI Stopped */
#define PCI_PRI_MAX_REQ 0x08 /* PRI max reqs supported */
#define PCI_PRI_ALLOC_REQ 0x0c /* PRI max reqs allowed */
/* PASID capability */
#define PCI_PASID_CAP 0x1b /* PASID capability ID */
#define PCI_PASID_CAP_OFF 0x04 /* PASID feature register */
#define PCI_PASID_CONTROL_OFF 0x06 /* PASID control register */
#define PCI_PASID_ENABLE 0x01 /* Enable/Supported bit */
#define PCI_PASID_EXEC 0x02 /* Exec permissions Enable/Supported */
#define PCI_PASID_PRIV 0x04 /* Priviledge Mode Enable/Support */
#define PCI_PASID_CAP 0x04 /* PASID feature register */
#define PCI_PASID_CAP_EXEC 0x02 /* Exec permissions Supported */
#define PCI_PASID_CAP_PRIV 0x04 /* Priviledge Mode Supported */
#define PCI_PASID_CTRL 0x06 /* PASID control register */
#define PCI_PASID_CTRL_ENABLE 0x01 /* Enable bit */
#define PCI_PASID_CTRL_EXEC 0x02 /* Exec permissions Enable */
#define PCI_PASID_CTRL_PRIV 0x04 /* Priviledge Mode Enable */
/* Single Root I/O Virtualization */
#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */
......
......@@ -113,7 +113,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
/* Map into IO address space */
r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
get_order(page_size), flags);
page_size, flags);
if (r) {
printk(KERN_ERR "kvm_iommu_map_address:"
"iommu failed to map pfn=%llx\n", pfn);
......@@ -293,15 +293,15 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
while (gfn < end_gfn) {
unsigned long unmap_pages;
int order;
size_t size;
/* Get physical address */
phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
pfn = phys >> PAGE_SHIFT;
/* Unmap address from IO address space */
order = iommu_unmap(domain, gfn_to_gpa(gfn), 0);
unmap_pages = 1ULL << order;
size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
unmap_pages = 1ULL << get_order(size);
/* Unpin all pages we just unmapped to not leak any memory */
kvm_unpin_pages(kvm, pfn, unmap_pages);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment