Commit ab1228e4 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.infradead.org/intel-iommu

Pull intel iommu updates from David Woodhouse:
 "This adds "Shared Virtual Memory" (aka PASID support) for the Intel
  IOMMU.  This allows devices to do DMA using process address space,
  translated through the normal CPU page tables for the relevant mm.

  With corresponding support added to the i915 driver, this has been
  tested with the graphics device on Skylake.  We don't have the
  required TLP support in our PCIe root ports for supporting discrete
  devices yet, so it's only integrated devices that can do it so far"

* git://git.infradead.org/intel-iommu: (23 commits)
  iommu/vt-d: Fix rwxp flags in SVM device fault callback
  iommu/vt-d: Expose struct svm_dev_ops without CONFIG_INTEL_IOMMU_SVM
  iommu/vt-d: Clean up pasid_enabled() and ecs_enabled() dependencies
  iommu/vt-d: Handle Caching Mode implementations of SVM
  iommu/vt-d: Fix SVM IOTLB flush handling
  iommu/vt-d: Use dev_err(..) in intel_svm_device_to_iommu(..)
  iommu/vt-d: fix a loop in prq_event_thread()
  iommu/vt-d: Fix IOTLB flushing for global pages
  iommu/vt-d: Fix address shifting in page request handler
  iommu/vt-d: shift wrapping bug in prq_event_thread()
  iommu/vt-d: Fix NULL pointer dereference in page request error case
  iommu/vt-d: Implement SVM_FLAG_SUPERVISOR_MODE for kernel access
  iommu/vt-d: Implement SVM_FLAG_PRIVATE_PASID to allocate unique PASIDs
  iommu/vt-d: Add callback to device driver on page faults
  iommu/vt-d: Implement page request handling
  iommu/vt-d: Generalise DMAR MSI setup to allow for page request events
  iommu/vt-d: Implement deferred invalidate for SVM
  iommu/vt-d: Add basic SVM PASID support
  iommu/vt-d: Always enable PASID/PRI PCI capabilities before ATS
  iommu/vt-d: Add initial support for PASID tables
  ...
parents 5ebe0ee8 0bdec95c
......@@ -134,6 +134,16 @@ config INTEL_IOMMU
and include PCI device scope covered by these DMA
remapping devices.
config INTEL_IOMMU_SVM
bool "Support for Shared Virtual Memory with Intel IOMMU"
depends on INTEL_IOMMU && X86
select PCI_PASID
select MMU_NOTIFIER
help
Shared Virtual Memory (SVM) provides a facility for devices
to access DMA resources through process address space by
means of a Process Address Space ID (PASID).
config INTEL_IOMMU_DEFAULT_ON
def_bool y
prompt "Enable Intel DMA Remapping Devices by default"
......
......@@ -12,6 +12,7 @@ obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o
obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
......
......@@ -1086,6 +1086,11 @@ static void free_iommu(struct intel_iommu *iommu)
iommu_device_destroy(iommu->iommu_dev);
if (iommu->irq) {
if (iommu->pr_irq) {
free_irq(iommu->pr_irq, iommu);
dmar_free_hwirq(iommu->pr_irq);
iommu->pr_irq = 0;
}
free_irq(iommu->irq, iommu);
dmar_free_hwirq(iommu->irq);
iommu->irq = 0;
......@@ -1493,53 +1498,68 @@ static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
}
}
static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
{
if (iommu->irq == irq)
return DMAR_FECTL_REG;
else if (iommu->pr_irq == irq)
return DMAR_PECTL_REG;
else
BUG();
}
void dmar_msi_unmask(struct irq_data *data)
{
struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
int reg = dmar_msi_reg(iommu, data->irq);
unsigned long flag;
/* unmask it */
raw_spin_lock_irqsave(&iommu->register_lock, flag);
writel(0, iommu->reg + DMAR_FECTL_REG);
writel(0, iommu->reg + reg);
/* Read a reg to force flush the post write */
readl(iommu->reg + DMAR_FECTL_REG);
readl(iommu->reg + reg);
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
void dmar_msi_mask(struct irq_data *data)
{
unsigned long flag;
struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
int reg = dmar_msi_reg(iommu, data->irq);
unsigned long flag;
/* mask it */
raw_spin_lock_irqsave(&iommu->register_lock, flag);
writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
writel(DMA_FECTL_IM, iommu->reg + reg);
/* Read a reg to force flush the post write */
readl(iommu->reg + DMAR_FECTL_REG);
readl(iommu->reg + reg);
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
void dmar_msi_write(int irq, struct msi_msg *msg)
{
struct intel_iommu *iommu = irq_get_handler_data(irq);
int reg = dmar_msi_reg(iommu, irq);
unsigned long flag;
raw_spin_lock_irqsave(&iommu->register_lock, flag);
writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
writel(msg->data, iommu->reg + reg + 4);
writel(msg->address_lo, iommu->reg + reg + 8);
writel(msg->address_hi, iommu->reg + reg + 12);
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
void dmar_msi_read(int irq, struct msi_msg *msg)
{
struct intel_iommu *iommu = irq_get_handler_data(irq);
int reg = dmar_msi_reg(iommu, irq);
unsigned long flag;
raw_spin_lock_irqsave(&iommu->register_lock, flag);
msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
msg->data = readl(iommu->reg + reg + 4);
msg->address_lo = readl(iommu->reg + reg + 8);
msg->address_hi = readl(iommu->reg + reg + 12);
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
......
This diff is collapsed.
This diff is collapsed.
......@@ -20,6 +20,14 @@
#define CONTEXT_TT_MULTI_LEVEL 0
#define CONTEXT_TT_DEV_IOTLB 1
#define CONTEXT_TT_PASS_THROUGH 2
/* Extended context entry types */
#define CONTEXT_TT_PT_PASID 4
#define CONTEXT_TT_PT_PASID_DEV_IOTLB 5
#define CONTEXT_TT_MASK (7ULL << 2)
#define CONTEXT_DINVE (1ULL << 8)
#define CONTEXT_PRS (1ULL << 9)
#define CONTEXT_PASIDE (1ULL << 11)
struct intel_iommu;
struct dmar_domain;
......
/*
* Copyright (c) 2006, Intel Corporation.
* Copyright © 2006-2015, Intel Corporation.
*
* Authors: Ashok Raj <ashok.raj@intel.com>
* Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
* David Woodhouse <David.Woodhouse@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
......@@ -13,10 +17,6 @@
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
*
* Copyright (C) 2006-2008 Intel Corporation
* Author: Ashok Raj <ashok.raj@intel.com>
* Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
*/
#ifndef _INTEL_IOMMU_H_
......@@ -25,7 +25,10 @@
#include <linux/types.h>
#include <linux/iova.h>
#include <linux/io.h>
#include <linux/idr.h>
#include <linux/dma_remapping.h>
#include <linux/mmu_notifier.h>
#include <linux/list.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
......@@ -57,16 +60,21 @@
#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */
#define DMAR_ICS_REG 0x9c /* Invalidation complete status register */
#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */
#define DMAR_PQH_REG 0xc0 /* Page request queue head register */
#define DMAR_PQT_REG 0xc8 /* Page request queue tail register */
#define DMAR_PQA_REG 0xd0 /* Page request queue address register */
#define DMAR_PRS_REG 0xdc /* Page request status register */
#define DMAR_PECTL_REG 0xe0 /* Page request event control register */
#define DMAR_PEDATA_REG 0xe4 /* Page request event interrupt data register */
#define DMAR_PEADDR_REG 0xe8 /* Page request event interrupt addr register */
#define DMAR_PEUADDR_REG 0xec /* Page request event Upper address register */
#define OFFSET_STRIDE (9)
/*
#define dmar_readl(dmar, reg) readl(dmar + reg)
#define dmar_readq(dmar, reg) ({ \
u32 lo, hi; \
lo = readl(dmar + reg); \
hi = readl(dmar + reg + 4); \
(((u64) hi) << 32) + lo; })
*/
#ifdef CONFIG_64BIT
#define dmar_readq(a) readq(a)
#define dmar_writeq(a,v) writeq(v,a)
#else
static inline u64 dmar_readq(void __iomem *addr)
{
u32 lo, hi;
......@@ -80,6 +88,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
writel((u32)val, addr);
writel((u32)(val >> 32), addr + 4);
}
#endif
#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4)
#define DMAR_VER_MINOR(v) ((v) & 0x0f)
......@@ -123,7 +132,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
#define ecap_srs(e) ((e >> 31) & 0x1)
#define ecap_ers(e) ((e >> 30) & 0x1)
#define ecap_prs(e) ((e >> 29) & 0x1)
/* PASID support used to be on bit 28 */
#define ecap_broken_pasid(e) ((e >> 28) & 0x1)
#define ecap_dis(e) ((e >> 27) & 0x1)
#define ecap_nest(e) ((e >> 26) & 0x1)
#define ecap_mts(e) ((e >> 25) & 0x1)
......@@ -253,6 +262,11 @@ enum {
#define QI_DIOTLB_TYPE 0x3
#define QI_IEC_TYPE 0x4
#define QI_IWD_TYPE 0x5
#define QI_EIOTLB_TYPE 0x6
#define QI_PC_TYPE 0x7
#define QI_DEIOTLB_TYPE 0x8
#define QI_PGRP_RESP_TYPE 0x9
#define QI_PSTRM_RESP_TYPE 0xa
#define QI_IEC_SELECTIVE (((u64)1) << 4)
#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32))
......@@ -280,6 +294,53 @@ enum {
#define QI_DEV_IOTLB_SIZE 1
#define QI_DEV_IOTLB_MAX_INVS 32
#define QI_PC_PASID(pasid) (((u64)pasid) << 32)
#define QI_PC_DID(did) (((u64)did) << 16)
#define QI_PC_GRAN(gran) (((u64)gran) << 4)
#define QI_PC_ALL_PASIDS (QI_PC_TYPE | QI_PC_GRAN(0))
#define QI_PC_PASID_SEL (QI_PC_TYPE | QI_PC_GRAN(1))
#define QI_EIOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK)
#define QI_EIOTLB_GL(gl) (((u64)gl) << 7)
#define QI_EIOTLB_IH(ih) (((u64)ih) << 6)
#define QI_EIOTLB_AM(am) (((u64)am))
#define QI_EIOTLB_PASID(pasid) (((u64)pasid) << 32)
#define QI_EIOTLB_DID(did) (((u64)did) << 16)
#define QI_EIOTLB_GRAN(gran) (((u64)gran) << 4)
#define QI_DEV_EIOTLB_ADDR(a) ((u64)(a) & VTD_PAGE_MASK)
#define QI_DEV_EIOTLB_SIZE (((u64)1) << 11)
#define QI_DEV_EIOTLB_GLOB(g) ((u64)g)
#define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32)
#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32)
#define QI_DEV_EIOTLB_QDEP(qd) (((qd) & 0x1f) << 16)
#define QI_DEV_EIOTLB_MAX_INVS 32
#define QI_PGRP_IDX(idx) (((u64)(idx)) << 55)
#define QI_PGRP_PRIV(priv) (((u64)(priv)) << 32)
#define QI_PGRP_RESP_CODE(res) ((u64)(res))
#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32)
#define QI_PGRP_DID(did) (((u64)(did)) << 16)
#define QI_PGRP_PASID_P(p) (((u64)(p)) << 4)
#define QI_PSTRM_ADDR(addr) (((u64)(addr)) & VTD_PAGE_MASK)
#define QI_PSTRM_DEVFN(devfn) (((u64)(devfn)) << 4)
#define QI_PSTRM_RESP_CODE(res) ((u64)(res))
#define QI_PSTRM_IDX(idx) (((u64)(idx)) << 55)
#define QI_PSTRM_PRIV(priv) (((u64)(priv)) << 32)
#define QI_PSTRM_BUS(bus) (((u64)(bus)) << 24)
#define QI_PSTRM_PASID(pasid) (((u64)(pasid)) << 4)
#define QI_RESP_SUCCESS 0x0
#define QI_RESP_INVALID 0x1
#define QI_RESP_FAILURE 0xf
#define QI_GRAN_ALL_ALL 0
#define QI_GRAN_NONG_ALL 1
#define QI_GRAN_NONG_PASID 2
#define QI_GRAN_PSI_PASID 3
struct qi_desc {
u64 low, high;
};
......@@ -327,6 +388,10 @@ enum {
#define VTD_FLAG_TRANS_PRE_ENABLED (1 << 0)
#define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1)
struct pasid_entry;
struct pasid_state_entry;
struct page_req_dsc;
struct intel_iommu {
void __iomem *reg; /* Pointer to hardware regs, virtual addr */
u64 reg_phys; /* physical address of hw register set */
......@@ -338,7 +403,7 @@ struct intel_iommu {
int seq_id; /* sequence id of the iommu */
int agaw; /* agaw of this iommu */
int msagaw; /* max sagaw of this iommu */
unsigned int irq;
unsigned int irq, pr_irq;
u16 segment; /* PCI segment# */
unsigned char name[13]; /* Device Name */
......@@ -349,6 +414,18 @@ struct intel_iommu {
struct root_entry *root_entry; /* virtual address */
struct iommu_flush flush;
#endif
#ifdef CONFIG_INTEL_IOMMU_SVM
/* These are large and need to be contiguous, so we allocate just
* one for now. We'll maybe want to rethink that if we truly give
* devices away to userspace processes (e.g. for DPDK) and don't
* want to trust that userspace will use *only* the PASID it was
* told to. But while it's all driver-arbitrated, we're fine. */
struct pasid_entry *pasid_table;
struct pasid_state_entry *pasid_state_table;
struct page_req_dsc *prq;
unsigned char prq_name[16]; /* Name for PRQ interrupt */
struct idr pasid_idr;
#endif
struct q_inval *qi; /* Queued invalidation info */
u32 *iommu_state; /* Store iommu states between suspend and resume.*/
......@@ -389,6 +466,38 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
extern int dmar_ir_support(void);
#ifdef CONFIG_INTEL_IOMMU_SVM
extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);
extern int intel_svm_enable_prq(struct intel_iommu *iommu);
extern int intel_svm_finish_prq(struct intel_iommu *iommu);
struct svm_dev_ops;
struct intel_svm_dev {
struct list_head list;
struct rcu_head rcu;
struct device *dev;
struct svm_dev_ops *ops;
int users;
u16 did;
u16 dev_iotlb:1;
u16 sid, qdep;
};
struct intel_svm {
struct mmu_notifier notifier;
struct mm_struct *mm;
struct intel_iommu *iommu;
int flags;
int pasid;
struct list_head devs;
};
extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev);
extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
#endif
extern const struct attribute_group *intel_iommu_groups[];
#endif
/*
* Copyright © 2015 Intel Corporation.
*
* Authors: David Woodhouse <David.Woodhouse@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef __INTEL_SVM_H__
#define __INTEL_SVM_H__
struct device;
struct svm_dev_ops {
void (*fault_cb)(struct device *dev, int pasid, u64 address,
u32 private, int rwxp, int response);
};
/* Values for rxwp in fault_cb callback */
#define SVM_REQ_READ (1<<3)
#define SVM_REQ_WRITE (1<<2)
#define SVM_REQ_EXEC (1<<1)
#define SVM_REQ_PRIV (1<<0)
/*
* The SVM_FLAG_PRIVATE_PASID flag requests a PASID which is *not* the "main"
* PASID for the current process. Even if a PASID already exists, a new one
* will be allocated. And the PASID allocated with SVM_FLAG_PRIVATE_PASID
* will not be given to subsequent callers. This facility allows a driver to
* disambiguate between multiple device contexts which access the same MM,
* if there is no other way to do so. It should be used sparingly, if at all.
*/
#define SVM_FLAG_PRIVATE_PASID (1<<0)
/*
* The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only
* for access to kernel addresses. No IOTLB flushes are automatically done
* for kernel mappings; it is valid only for access to the kernel's static
* 1:1 mapping of physical memory — not to vmalloc or even module mappings.
* A future API addition may permit the use of such ranges, by means of an
* explicit IOTLB flush call (akin to the DMA API's unmap method).
*
* It is unlikely that we will ever hook into flush_tlb_kernel_range() to
* do such IOTLB flushes automatically.
*/
#define SVM_FLAG_SUPERVISOR_MODE (1<<1)
#ifdef CONFIG_INTEL_IOMMU_SVM
/**
* intel_svm_bind_mm() - Bind the current process to a PASID
* @dev: Device to be granted acccess
* @pasid: Address for allocated PASID
* @flags: Flags. Later for requesting supervisor mode, etc.
* @ops: Callbacks to device driver
*
* This function attempts to enable PASID support for the given device.
* If the @pasid argument is non-%NULL, a PASID is allocated for access
* to the MM of the current process.
*
* By using a %NULL value for the @pasid argument, this function can
* be used to simply validate that PASID support is available for the
* given device — i.e. that it is behind an IOMMU which has the
* requisite support, and is enabled.
*
* Page faults are handled transparently by the IOMMU code, and there
* should be no need for the device driver to be involved. If a page
* fault cannot be handled (i.e. is an invalid address rather than
* just needs paging in), then the page request will be completed by
* the core IOMMU code with appropriate status, and the device itself
* can then report the resulting fault to its driver via whatever
* mechanism is appropriate.
*
* Multiple calls from the same process may result in the same PASID
* being re-used. A reference count is kept.
*/
extern int intel_svm_bind_mm(struct device *dev, int *pasid, int flags,
struct svm_dev_ops *ops);
/**
* intel_svm_unbind_mm() - Unbind a specified PASID
* @dev: Device for which PASID was allocated
* @pasid: PASID value to be unbound
*
* This function allows a PASID to be retired when the device no
* longer requires access to the address space of a given process.
*
* If the use count for the PASID in question reaches zero, the
* PASID is revoked and may no longer be used by hardware.
*
* Device drivers are required to ensure that no access (including
* page requests) is currently outstanding for the PASID in question,
* before calling this function.
*/
extern int intel_svm_unbind_mm(struct device *dev, int pasid);
#else /* CONFIG_INTEL_IOMMU_SVM */
static inline int intel_svm_bind_mm(struct device *dev, int *pasid,
int flags, struct svm_dev_ops *ops)
{
return -ENOSYS;
}
static inline int intel_svm_unbind_mm(struct device *dev, int pasid)
{
BUG();
}
#endif /* CONFIG_INTEL_IOMMU_SVM */
#define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL, 0, NULL))
#endif /* __INTEL_SVM_H__ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment