Commit 405d7ca5 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.infradead.org/iommu-2.6

* git://git.infradead.org/iommu-2.6: (38 commits)
  intel-iommu: Don't keep freeing page zero in dma_pte_free_pagetable()
  intel-iommu: Introduce first_pte_in_page() to simplify PTE-setting loops
  intel-iommu: Use cmpxchg64_local() for setting PTEs
  intel-iommu: Warn about unmatched unmap requests
  intel-iommu: Kill superfluous mapping_lock
  intel-iommu: Ensure that PTE writes are 64-bit atomic, even on i386
  intel-iommu: Make iommu=pt work on i386 too
  intel-iommu: Performance improvement for dma_pte_free_pagetable()
  intel-iommu: Don't free too much in dma_pte_free_pagetable()
  intel-iommu: dump mappings but don't die on pte already set
  intel-iommu: Combine domain_pfn_mapping() and domain_sg_mapping()
  intel-iommu: Introduce domain_sg_mapping() to speed up intel_map_sg()
  intel-iommu: Simplify __intel_alloc_iova()
  intel-iommu: Performance improvement for domain_pfn_mapping()
  intel-iommu: Performance improvement for dma_pte_clear_range()
  intel-iommu: Clean up iommu_domain_identity_map()
  intel-iommu: Remove last use of PHYSICAL_PAGE_MASK, for reserving PCI BARs
  intel-iommu: Make iommu_flush_iotlb_psi() take pfn as argument
  intel-iommu: Change aligned_size() to aligned_nrpages()
  intel-iommu: Clean up intel_map_sg(), remove domain_page_mapping()
  ...
parents 7c5371c4 6a43e574
......@@ -1913,25 +1913,14 @@ config DMAR_DEFAULT_ON
recommended you say N here while the DMAR code remains
experimental.
config DMAR_GFX_WA
def_bool y
prompt "Support for Graphics workaround"
depends on DMAR
---help---
Current Graphics drivers tend to use physical address
for DMA and avoid using DMA APIs. Setting this config
option permits the IOMMU driver to set a unity map for
all the OS-visible memory. Hence the driver can continue
to use physical addresses for DMA.
config DMAR_FLOPPY_WA
def_bool y
depends on DMAR
---help---
Floppy disk drivers are know to bypass DMA API calls
Floppy disk drivers are known to bypass DMA API calls
thereby failing to work when IOMMU is enabled. This
workaround will setup a 1:1 mapping for the first
16M to make floppy (an ISA device) work.
16MiB to make floppy (an ISA device) work.
config INTR_REMAP
bool "Support for Interrupt Remapping (EXPERIMENTAL)"
......
......@@ -211,11 +211,11 @@ static __init int iommu_setup(char *p)
#ifdef CONFIG_SWIOTLB
if (!strncmp(p, "soft", 4))
swiotlb = 1;
#endif
if (!strncmp(p, "pt", 2)) {
iommu_pass_through = 1;
return 1;
}
#endif
gart_parse_options(p);
......
......@@ -56,14 +56,32 @@
#define MAX_AGAW_WIDTH 64
#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
#define DOMAIN_MAX_PFN(gaw) ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
#ifndef PHYSICAL_PAGE_MASK
#define PHYSICAL_PAGE_MASK PAGE_MASK
#endif
/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
are never going to work. */
static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
{
return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
}
static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
{
return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
}
static inline unsigned long page_to_dma_pfn(struct page *pg)
{
return mm_to_dma_pfn(page_to_pfn(pg));
}
static inline unsigned long virt_to_dma_pfn(void *p)
{
return page_to_dma_pfn(virt_to_page(p));
}
/* global iommu list, set NULL for ignored DMAR units */
static struct intel_iommu **g_iommus;
......@@ -204,12 +222,17 @@ static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
static inline u64 dma_pte_addr(struct dma_pte *pte)
{
return (pte->val & VTD_PAGE_MASK);
#ifdef CONFIG_64BIT
return pte->val & VTD_PAGE_MASK;
#else
/* Must have a full atomic 64-bit read */
return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK;
#endif
}
static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
{
pte->val |= (addr & VTD_PAGE_MASK);
pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
}
static inline bool dma_pte_present(struct dma_pte *pte)
......@@ -217,6 +240,11 @@ static inline bool dma_pte_present(struct dma_pte *pte)
return (pte->val & 3) != 0;
}
static inline int first_pte_in_page(struct dma_pte *pte)
{
return !((unsigned long)pte & ~VTD_PAGE_MASK);
}
/*
* This domain is a statically identity mapping domain.
* 1. This domain creats a static 1:1 mapping to all usable memory.
......@@ -244,7 +272,6 @@ struct dmar_domain {
struct iova_domain iovad; /* iova's that belong to this domain */
struct dma_pte *pgd; /* virtual address */
spinlock_t mapping_lock; /* page table lock */
int gaw; /* max guest address width */
/* adjusted guest address width, 0 is level 2 30-bit */
......@@ -648,79 +675,77 @@ static inline int width_to_agaw(int width)
static inline unsigned int level_to_offset_bits(int level)
{
return (12 + (level - 1) * LEVEL_STRIDE);
return (level - 1) * LEVEL_STRIDE;
}
static inline int address_level_offset(u64 addr, int level)
static inline int pfn_level_offset(unsigned long pfn, int level)
{
return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
}
static inline u64 level_mask(int level)
static inline unsigned long level_mask(int level)
{
return ((u64)-1 << level_to_offset_bits(level));
return -1UL << level_to_offset_bits(level);
}
static inline u64 level_size(int level)
static inline unsigned long level_size(int level)
{
return ((u64)1 << level_to_offset_bits(level));
return 1UL << level_to_offset_bits(level);
}
static inline u64 align_to_level(u64 addr, int level)
static inline unsigned long align_to_level(unsigned long pfn, int level)
{
return ((addr + level_size(level) - 1) & level_mask(level));
return (pfn + level_size(level) - 1) & level_mask(level);
}
static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
unsigned long pfn)
{
int addr_width = agaw_to_width(domain->agaw);
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
struct dma_pte *parent, *pte = NULL;
int level = agaw_to_level(domain->agaw);
int offset;
unsigned long flags;
BUG_ON(!domain->pgd);
addr &= (((u64)1) << addr_width) - 1;
BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
parent = domain->pgd;
spin_lock_irqsave(&domain->mapping_lock, flags);
while (level > 0) {
void *tmp_page;
offset = address_level_offset(addr, level);
offset = pfn_level_offset(pfn, level);
pte = &parent[offset];
if (level == 1)
break;
if (!dma_pte_present(pte)) {
uint64_t pteval;
tmp_page = alloc_pgtable_page();
if (!tmp_page) {
spin_unlock_irqrestore(&domain->mapping_lock,
flags);
if (!tmp_page)
return NULL;
}
domain_flush_cache(domain, tmp_page, PAGE_SIZE);
dma_set_pte_addr(pte, virt_to_phys(tmp_page));
/*
* high level table always sets r/w, last level page
* table control read/write
*/
dma_set_pte_readable(pte);
dma_set_pte_writable(pte);
domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
if (cmpxchg64(&pte->val, 0ULL, pteval)) {
/* Someone else set it while we were thinking; use theirs. */
free_pgtable_page(tmp_page);
} else {
dma_pte_addr(pte);
domain_flush_cache(domain, pte, sizeof(*pte));
}
}
parent = phys_to_virt(dma_pte_addr(pte));
level--;
}
spin_unlock_irqrestore(&domain->mapping_lock, flags);
return pte;
}
/* return address's pte at specific level */
static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
unsigned long pfn,
int level)
{
struct dma_pte *parent, *pte = NULL;
......@@ -729,7 +754,7 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
parent = domain->pgd;
while (level <= total) {
offset = address_level_offset(addr, total);
offset = pfn_level_offset(pfn, total);
pte = &parent[offset];
if (level == total)
return pte;
......@@ -742,74 +767,82 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
return NULL;
}
/* clear one page's page table */
static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
/* clear last level pte, a tlb flush should be followed */
static void dma_pte_clear_range(struct dmar_domain *domain,
unsigned long start_pfn,
unsigned long last_pfn)
{
struct dma_pte *pte = NULL;
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
struct dma_pte *first_pte, *pte;
/* get last level pte */
pte = dma_addr_level_pte(domain, addr, 1);
BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
if (pte) {
dma_clear_pte(pte);
domain_flush_cache(domain, pte, sizeof(*pte));
/* we don't need lock here; nobody else touches the iova range */
while (start_pfn <= last_pfn) {
first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
if (!pte) {
start_pfn = align_to_level(start_pfn + 1, 2);
continue;
}
}
/* clear last level pte, a tlb flush should be followed */
static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
{
int addr_width = agaw_to_width(domain->agaw);
int npages;
start &= (((u64)1) << addr_width) - 1;
end &= (((u64)1) << addr_width) - 1;
/* in case it's partial page */
start &= PAGE_MASK;
end = PAGE_ALIGN(end);
npages = (end - start) / VTD_PAGE_SIZE;
do {
dma_clear_pte(pte);
start_pfn++;
pte++;
} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
/* we don't need lock here, nobody else touches the iova range */
while (npages--) {
dma_pte_clear_one(domain, start);
start += VTD_PAGE_SIZE;
domain_flush_cache(domain, first_pte,
(void *)pte - (void *)first_pte);
}
}
/* free page table pages. last level pte should already be cleared */
static void dma_pte_free_pagetable(struct dmar_domain *domain,
u64 start, u64 end)
unsigned long start_pfn,
unsigned long last_pfn)
{
int addr_width = agaw_to_width(domain->agaw);
struct dma_pte *pte;
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
struct dma_pte *first_pte, *pte;
int total = agaw_to_level(domain->agaw);
int level;
u64 tmp;
unsigned long tmp;
start &= (((u64)1) << addr_width) - 1;
end &= (((u64)1) << addr_width) - 1;
BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
/* we don't need lock here, nobody else touches the iova range */
/* We don't need lock here; nobody else touches the iova range */
level = 2;
while (level <= total) {
tmp = align_to_level(start, level);
if (tmp >= end || (tmp + level_size(level) > end))
tmp = align_to_level(start_pfn, level);
/* If we can't even clear one PTE at this level, we're done */
if (tmp + level_size(level) - 1 > last_pfn)
return;
while (tmp < end) {
pte = dma_addr_level_pte(domain, tmp, level);
if (pte) {
free_pgtable_page(
phys_to_virt(dma_pte_addr(pte)));
while (tmp + level_size(level) - 1 <= last_pfn) {
first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
if (!pte) {
tmp = align_to_level(tmp + 1, level + 1);
continue;
}
do {
if (dma_pte_present(pte)) {
free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
dma_clear_pte(pte);
domain_flush_cache(domain, pte, sizeof(*pte));
}
pte++;
tmp += level_size(level);
} while (!first_pte_in_page(pte) &&
tmp + level_size(level) - 1 <= last_pfn);
domain_flush_cache(domain, first_pte,
(void *)pte - (void *)first_pte);
}
level++;
}
/* free pgd */
if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
free_pgtable_page(domain->pgd);
domain->pgd = NULL;
}
......@@ -1035,11 +1068,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
}
static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
u64 addr, unsigned int pages)
unsigned long pfn, unsigned int pages)
{
unsigned int mask = ilog2(__roundup_pow_of_two(pages));
uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
BUG_ON(addr & (~VTD_PAGE_MASK));
BUG_ON(pages == 0);
/*
......@@ -1054,7 +1087,12 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
else
iommu->flush.flush_iotlb(iommu, did, addr, mask,
DMA_TLB_PSI_FLUSH);
if (did)
/*
* In caching mode, domain ID 0 is reserved for non-present to present
* mapping flush. Device IOTLB doesn't need to be flushed in this case.
*/
if (!cap_caching_mode(iommu->cap) || did)
iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
}
......@@ -1279,7 +1317,6 @@ static void dmar_init_reserved_ranges(void)
struct pci_dev *pdev = NULL;
struct iova *iova;
int i;
u64 addr, size;
init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
......@@ -1302,12 +1339,9 @@ static void dmar_init_reserved_ranges(void)
r = &pdev->resource[i];
if (!r->flags || !(r->flags & IORESOURCE_MEM))
continue;
addr = r->start;
addr &= PHYSICAL_PAGE_MASK;
size = r->end - addr;
size = PAGE_ALIGN(size);
iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
IOVA_PFN(size + addr) - 1);
iova = reserve_iova(&reserved_iova_list,
IOVA_PFN(r->start),
IOVA_PFN(r->end));
if (!iova)
printk(KERN_ERR "Reserve iova failed\n");
}
......@@ -1341,7 +1375,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
unsigned long sagaw;
init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
spin_lock_init(&domain->mapping_lock);
spin_lock_init(&domain->iommu_lock);
domain_reserve_special_ranges(domain);
......@@ -1388,7 +1421,6 @@ static void domain_exit(struct dmar_domain *domain)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
u64 end;
/* Domain 0 is reserved, so dont process it */
if (!domain)
......@@ -1397,14 +1429,12 @@ static void domain_exit(struct dmar_domain *domain)
domain_remove_dev_info(domain);
/* destroy iovas */
put_iova_domain(&domain->iovad);
end = DOMAIN_MAX_ADDR(domain->gaw);
end = end & (~PAGE_MASK);
/* clear ptes */
dma_pte_clear_range(domain, 0, end);
dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
/* free page tables */
dma_pte_free_pagetable(domain, 0, end);
dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
for_each_active_iommu(iommu, drhd)
if (test_bit(iommu->seq_id, &domain->iommu_bmp))
......@@ -1618,42 +1648,86 @@ static int domain_context_mapped(struct pci_dev *pdev)
tmp->devfn);
}
static int
domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
u64 hpa, size_t size, int prot)
static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
struct scatterlist *sg, unsigned long phys_pfn,
unsigned long nr_pages, int prot)
{
u64 start_pfn, end_pfn;
struct dma_pte *pte;
int index;
int addr_width = agaw_to_width(domain->agaw);
struct dma_pte *first_pte = NULL, *pte = NULL;
phys_addr_t uninitialized_var(pteval);
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
unsigned long sg_res;
hpa &= (((u64)1) << addr_width) - 1;
BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
return -EINVAL;
iova &= PAGE_MASK;
start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
index = 0;
while (start_pfn < end_pfn) {
pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
if (sg)
sg_res = 0;
else {
sg_res = nr_pages + 1;
pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
}
while (nr_pages--) {
uint64_t tmp;
if (!sg_res) {
sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT;
sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
sg->dma_length = sg->length;
pteval = page_to_phys(sg_page(sg)) | prot;
}
if (!pte) {
first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
if (!pte)
return -ENOMEM;
}
/* We don't need lock here, nobody else
* touches the iova range
*/
BUG_ON(dma_pte_addr(pte));
dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
dma_set_pte_prot(pte, prot);
if (prot & DMA_PTE_SNP)
dma_set_pte_snp(pte);
domain_flush_cache(domain, pte, sizeof(*pte));
start_pfn++;
index++;
tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
if (tmp) {
static int dumps = 5;
printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
iov_pfn, tmp, (unsigned long long)pteval);
if (dumps) {
dumps--;
debug_dma_dump_mappings(NULL);
}
WARN_ON(1);
}
pte++;
if (!nr_pages || first_pte_in_page(pte)) {
domain_flush_cache(domain, first_pte,
(void *)pte - (void *)first_pte);
pte = NULL;
}
iov_pfn++;
pteval += VTD_PAGE_SIZE;
sg_res--;
if (!sg_res)
sg = sg_next(sg);
}
return 0;
}
static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
struct scatterlist *sg, unsigned long nr_pages,
int prot)
{
return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
}
static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
unsigned long phys_pfn, unsigned long nr_pages,
int prot)
{
return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
}
static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
{
if (!iommu)
......@@ -1844,58 +1918,61 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
static int iommu_identity_mapping;
static int iommu_domain_identity_map(struct dmar_domain *domain,
unsigned long long start,
unsigned long long end)
{
unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
dma_to_mm_pfn(last_vpfn))) {
printk(KERN_ERR "IOMMU: reserve iova failed\n");
return -ENOMEM;
}
pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
start, end, domain->id);
/*
* RMRR range might have overlap with physical memory range,
* clear it first
*/
dma_pte_clear_range(domain, first_vpfn, last_vpfn);
return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
last_vpfn - first_vpfn + 1,
DMA_PTE_READ|DMA_PTE_WRITE);
}
static int iommu_prepare_identity_map(struct pci_dev *pdev,
unsigned long long start,
unsigned long long end)
{
struct dmar_domain *domain;
unsigned long size;
unsigned long long base;
int ret;
printk(KERN_INFO
"IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
pci_name(pdev), start, end);
if (iommu_identity_mapping)
domain = si_domain;
else
/* page table init */
domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
if (!domain)
return -ENOMEM;
/* The address might not be aligned */
base = start & PAGE_MASK;
size = end - base;
size = PAGE_ALIGN(size);
if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
IOVA_PFN(base + size) - 1)) {
printk(KERN_ERR "IOMMU: reserve iova failed\n");
ret = -ENOMEM;
goto error;
}
pr_debug("Mapping reserved region %lx@%llx for %s\n",
size, base, pci_name(pdev));
/*
* RMRR range might have overlap with physical memory range,
* clear it first
*/
dma_pte_clear_range(domain, base, base + size);
ret = domain_page_mapping(domain, base, base, size,
DMA_PTE_READ|DMA_PTE_WRITE);
ret = iommu_domain_identity_map(domain, start, end);
if (ret)
goto error;
/* context entry init */
ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
if (!ret)
if (ret)
goto error;
return 0;
error:
error:
domain_exit(domain);
return ret;
}
static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
......@@ -1907,64 +1984,6 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
rmrr->end_address + 1);
}
struct iommu_prepare_data {
struct pci_dev *pdev;
int ret;
};
static int __init iommu_prepare_work_fn(unsigned long start_pfn,
unsigned long end_pfn, void *datax)
{
struct iommu_prepare_data *data;
data = (struct iommu_prepare_data *)datax;
data->ret = iommu_prepare_identity_map(data->pdev,
start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
return data->ret;
}
static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
{
int nid;
struct iommu_prepare_data data;
data.pdev = pdev;
data.ret = 0;
for_each_online_node(nid) {
work_with_active_regions(nid, iommu_prepare_work_fn, &data);
if (data.ret)
return data.ret;
}
return data.ret;
}
#ifdef CONFIG_DMAR_GFX_WA
static void __init iommu_prepare_gfx_mapping(void)
{
struct pci_dev *pdev = NULL;
int ret;
for_each_pci_dev(pdev) {
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
!IS_GFX_DEVICE(pdev))
continue;
printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
pci_name(pdev));
ret = iommu_prepare_with_active_regions(pdev);
if (ret)
printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
}
}
#else /* !CONFIG_DMAR_GFX_WA */
static inline void iommu_prepare_gfx_mapping(void)
{
return;
}
#endif
#ifdef CONFIG_DMAR_FLOPPY_WA
static inline void iommu_prepare_isa(void)
{
......@@ -1975,11 +1994,11 @@ static inline void iommu_prepare_isa(void)
if (!pdev)
return;
printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
if (ret)
printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, "
printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
"floppy might not work\n");
}
......@@ -2008,16 +2027,30 @@ static int __init init_context_pass_through(void)
}
static int md_domain_init(struct dmar_domain *domain, int guest_width);
static int __init si_domain_work_fn(unsigned long start_pfn,
unsigned long end_pfn, void *datax)
{
int *ret = datax;
*ret = iommu_domain_identity_map(si_domain,
(uint64_t)start_pfn << PAGE_SHIFT,
(uint64_t)end_pfn << PAGE_SHIFT);
return *ret;
}
static int si_domain_init(void)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
int ret = 0;
int nid, ret = 0;
si_domain = alloc_domain();
if (!si_domain)
return -EFAULT;
pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
for_each_active_iommu(iommu, drhd) {
ret = iommu_attach_domain(si_domain, iommu);
......@@ -2034,6 +2067,12 @@ static int si_domain_init(void)
si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
for_each_online_node(nid) {
work_with_active_regions(nid, si_domain_work_fn, &ret);
if (ret)
return ret;
}
return 0;
}
......@@ -2087,13 +2126,14 @@ static int iommu_prepare_static_identity_mapping(void)
if (ret)
return -EFAULT;
printk(KERN_INFO "IOMMU: Setting identity map:\n");
for_each_pci_dev(pdev) {
ret = iommu_prepare_with_active_regions(pdev);
if (ret) {
printk(KERN_INFO "1:1 mapping to one domain failed.\n");
return -EFAULT;
}
printk(KERN_INFO "IOMMU: identity mapping for device %s\n",
pci_name(pdev));
ret = domain_context_mapping(si_domain, pdev,
CONTEXT_TT_MULTI_LEVEL);
if (ret)
return ret;
ret = domain_add_dev_info(si_domain, pdev);
if (ret)
return ret;
......@@ -2284,8 +2324,6 @@ int __init init_dmars(void)
}
}
iommu_prepare_gfx_mapping();
iommu_prepare_isa();
}
......@@ -2330,50 +2368,40 @@ int __init init_dmars(void)
return ret;
}
static inline u64 aligned_size(u64 host_addr, size_t size)
{
u64 addr;
addr = (host_addr & (~PAGE_MASK)) + size;
return PAGE_ALIGN(addr);
}
struct iova *
iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
static inline unsigned long aligned_nrpages(unsigned long host_addr,
size_t size)
{
struct iova *piova;
/* Make sure it's in range */
end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
if (!size || (IOVA_START_ADDR + size > end))
return NULL;
host_addr &= ~PAGE_MASK;
host_addr += size + PAGE_SIZE - 1;
piova = alloc_iova(&domain->iovad,
size >> PAGE_SHIFT, IOVA_PFN(end), 1);
return piova;
return host_addr >> VTD_PAGE_SHIFT;
}
static struct iova *
__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
size_t size, u64 dma_mask)
static struct iova *intel_alloc_iova(struct device *dev,
struct dmar_domain *domain,
unsigned long nrpages, uint64_t dma_mask)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct iova *iova = NULL;
if (dma_mask <= DMA_BIT_MASK(32) || dmar_forcedac)
iova = iommu_alloc_iova(domain, size, dma_mask);
else {
/* Restrict dma_mask to the width that the iommu can handle */
dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
/*
* First try to allocate an io virtual address in
* DMA_BIT_MASK(32) and if that fails then try allocating
* from higher range
*/
iova = iommu_alloc_iova(domain, size, DMA_BIT_MASK(32));
if (!iova)
iova = iommu_alloc_iova(domain, size, dma_mask);
iova = alloc_iova(&domain->iovad, nrpages,
IOVA_PFN(DMA_BIT_MASK(32)), 1);
if (iova)
return iova;
}
if (!iova) {
printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
if (unlikely(!iova)) {
printk(KERN_ERR "Allocating %ld-page iova for %s failed",
nrpages, pci_name(pdev));
return NULL;
}
......@@ -2476,14 +2504,12 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
return 0;
iommu = domain_get_iommu(domain);
size = aligned_size((u64)paddr, size);
size = aligned_nrpages(paddr, size);
iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
if (!iova)
goto error;
start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
/*
* Check if DMAR supports zero-length reads on write only
* mappings..
......@@ -2499,20 +2525,20 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
* might have two guest_addr mapping to the same host paddr, but this
* is not a big problem
*/
ret = domain_page_mapping(domain, start_paddr,
((u64)paddr) & PHYSICAL_PAGE_MASK,
size, prot);
ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
paddr >> VTD_PAGE_SHIFT, size, prot);
if (ret)
goto error;
/* it's a non-present to present mapping. Only flush if caching mode */
if (cap_caching_mode(iommu->cap))
iommu_flush_iotlb_psi(iommu, 0, start_paddr,
size >> VTD_PAGE_SHIFT);
iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size);
else
iommu_flush_write_buffer(iommu);
return start_paddr + ((u64)paddr & (~PAGE_MASK));
start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
start_paddr += paddr & ~PAGE_MASK;
return start_paddr;
error:
if (iova)
......@@ -2605,7 +2631,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
{
struct pci_dev *pdev = to_pci_dev(dev);
struct dmar_domain *domain;
unsigned long start_addr;
unsigned long start_pfn, last_pfn;
struct iova *iova;
struct intel_iommu *iommu;
......@@ -2618,22 +2644,25 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
iommu = domain_get_iommu(domain);
iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
if (!iova)
if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
(unsigned long long)dev_addr))
return;
start_addr = iova->pfn_lo << PAGE_SHIFT;
size = aligned_size((u64)dev_addr, size);
start_pfn = mm_to_dma_pfn(iova->pfn_lo);
last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
pr_debug("Device %s unmapping: %zx@%llx\n",
pci_name(pdev), size, (unsigned long long)start_addr);
pr_debug("Device %s unmapping: pfn %lx-%lx\n",
pci_name(pdev), start_pfn, last_pfn);
/* clear the whole page */
dma_pte_clear_range(domain, start_addr, start_addr + size);
dma_pte_clear_range(domain, start_pfn, last_pfn);
/* free page tables */
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
dma_pte_free_pagetable(domain, start_pfn, last_pfn);
if (intel_iommu_strict) {
iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
size >> VTD_PAGE_SHIFT);
iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
last_pfn - start_pfn + 1);
/* free iova */
__free_iova(&domain->iovad, iova);
} else {
......@@ -2691,14 +2720,10 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
int nelems, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
int i;
struct pci_dev *pdev = to_pci_dev(hwdev);
struct dmar_domain *domain;
unsigned long start_addr;
unsigned long start_pfn, last_pfn;
struct iova *iova;
size_t size = 0;
phys_addr_t addr;
struct scatterlist *sg;
struct intel_iommu *iommu;
if (iommu_no_mapping(pdev))
......@@ -2710,22 +2735,21 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
iommu = domain_get_iommu(domain);
iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
if (!iova)
if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
(unsigned long long)sglist[0].dma_address))
return;
for_each_sg(sglist, sg, nelems, i) {
addr = page_to_phys(sg_page(sg)) + sg->offset;
size += aligned_size((u64)addr, sg->length);
}
start_addr = iova->pfn_lo << PAGE_SHIFT;
start_pfn = mm_to_dma_pfn(iova->pfn_lo);
last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
/* clear the whole page */
dma_pte_clear_range(domain, start_addr, start_addr + size);
dma_pte_clear_range(domain, start_pfn, last_pfn);
/* free page tables */
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
dma_pte_free_pagetable(domain, start_pfn, last_pfn);
iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
size >> VTD_PAGE_SHIFT);
iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
(last_pfn - start_pfn + 1));
/* free iova */
__free_iova(&domain->iovad, iova);
......@@ -2748,17 +2772,16 @@ static int intel_nontranslate_map_sg(struct device *hddev,
static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
enum dma_data_direction dir, struct dma_attrs *attrs)
{
phys_addr_t addr;
int i;
struct pci_dev *pdev = to_pci_dev(hwdev);
struct dmar_domain *domain;
size_t size = 0;
int prot = 0;
size_t offset = 0;
size_t offset_pfn = 0;
struct iova *iova = NULL;
int ret;
struct scatterlist *sg;
unsigned long start_addr;
unsigned long start_vpfn;
struct intel_iommu *iommu;
BUG_ON(dir == DMA_NONE);
......@@ -2771,12 +2794,10 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
iommu = domain_get_iommu(domain);
for_each_sg(sglist, sg, nelems, i) {
addr = page_to_phys(sg_page(sg)) + sg->offset;
size += aligned_size((u64)addr, sg->length);
}
for_each_sg(sglist, sg, nelems, i)
size += aligned_nrpages(sg->offset, sg->length);
iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
if (!iova) {
sglist->dma_length = 0;
return 0;
......@@ -2792,35 +2813,24 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
prot |= DMA_PTE_WRITE;
start_addr = iova->pfn_lo << PAGE_SHIFT;
offset = 0;
for_each_sg(sglist, sg, nelems, i) {
addr = page_to_phys(sg_page(sg)) + sg->offset;
size = aligned_size((u64)addr, sg->length);
ret = domain_page_mapping(domain, start_addr + offset,
((u64)addr) & PHYSICAL_PAGE_MASK,
size, prot);
if (ret) {
start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
ret = domain_sg_mapping(domain, start_vpfn, sglist, mm_to_dma_pfn(size), prot);
if (unlikely(ret)) {
/* clear the page */
dma_pte_clear_range(domain, start_addr,
start_addr + offset);
dma_pte_clear_range(domain, start_vpfn,
start_vpfn + size - 1);
/* free page tables */
dma_pte_free_pagetable(domain, start_addr,
start_addr + offset);
dma_pte_free_pagetable(domain, start_vpfn,
start_vpfn + size - 1);
/* free iova */
__free_iova(&domain->iovad, iova);
return 0;
}
sg->dma_address = start_addr + offset +
((u64)addr & (~PAGE_MASK));
sg->dma_length = sg->length;
offset += size;
}
/* it's a non-present to present mapping. Only flush if caching mode */
if (cap_caching_mode(iommu->cap))
iommu_flush_iotlb_psi(iommu, 0, start_addr,
offset >> VTD_PAGE_SHIFT);
iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn);
else
iommu_flush_write_buffer(iommu);
......@@ -3325,7 +3335,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
int adjust_width;
init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
spin_lock_init(&domain->mapping_lock);
spin_lock_init(&domain->iommu_lock);
domain_reserve_special_ranges(domain);
......@@ -3379,8 +3388,6 @@ static void iommu_free_vm_domain(struct dmar_domain *domain)
static void vm_domain_exit(struct dmar_domain *domain)
{
u64 end;
/* Domain 0 is reserved, so dont process it */
if (!domain)
return;
......@@ -3388,14 +3395,12 @@ static void vm_domain_exit(struct dmar_domain *domain)
vm_domain_remove_all_dev_info(domain);
/* destroy iovas */
put_iova_domain(&domain->iovad);
end = DOMAIN_MAX_ADDR(domain->gaw);
end = end & (~VTD_PAGE_MASK);
/* clear ptes */
dma_pte_clear_range(domain, 0, end);
dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
/* free page tables */
dma_pte_free_pagetable(domain, 0, end);
dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
iommu_free_vm_domain(domain);
free_domain_mem(domain);
......@@ -3504,7 +3509,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
prot |= DMA_PTE_SNP;
max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
max_addr = iova + size;
if (dmar_domain->max_addr < max_addr) {
int min_agaw;
u64 end;
......@@ -3522,8 +3527,11 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
}
dmar_domain->max_addr = max_addr;
}
ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot);
/* Round up size to next multiple of PAGE_SIZE, if it and
the low bits of hpa would take us onto the next page */
size = aligned_nrpages(hpa, size);
ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
hpa >> VTD_PAGE_SHIFT, size, prot);
return ret;
}
......@@ -3531,15 +3539,12 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
struct dmar_domain *dmar_domain = domain->priv;
dma_addr_t base;
/* The address might not be aligned */
base = iova & VTD_PAGE_MASK;
size = VTD_PAGE_ALIGN(size);
dma_pte_clear_range(dmar_domain, base, base + size);
dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
(iova + size - 1) >> VTD_PAGE_SHIFT);
if (dmar_domain->max_addr == base + size)
dmar_domain->max_addr = base;
if (dmar_domain->max_addr == iova + size)
dmar_domain->max_addr = iova;
}
static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
......@@ -3549,7 +3554,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
struct dma_pte *pte;
u64 phys = 0;
pte = addr_to_dma_pte(dmar_domain, iova);
pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
if (pte)
phys = dma_pte_addr(pte);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment