Commit 99e38df8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'iommu-updates-v4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull IOMMU updates from Joerg Roedel:
 "The updates include:

   - Small code cleanups in the AMD IOMMUv2 driver

   - Scalability improvements for the DMA-API implementation of the AMD
     IOMMU driver.  This is just a starting point, but already showed
     some good improvements in my tests.

   - Removal of the unused Renesas IPMMU/IPMMUI driver

   - Updates for ARM-SMMU include:
      * Some fixes to get the driver working nicely on Broadcom hardware
      * A change to the io-pgtable API to indicate the unit in which to
        flush (all callers converted, with Ack from Laurent)
      * Use of devm_* for allocating/freeing the SMMUv3 buffers

   - Some other small fixes and improvements for other drivers"

* tag 'iommu-updates-v4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (46 commits)
  iommu/vt-d: Fix up error handling in alloc_iommu
  iommu/vt-d: Check the return value of iommu_device_create()
  iommu/amd: Remove an unneeded condition
  iommu/amd: Preallocate dma_ops apertures based on dma_mask
  iommu/amd: Use trylock to aquire bitmap_lock
  iommu/amd: Make dma_ops_domain->next_index percpu
  iommu/amd: Relax locking in dma_ops path
  iommu/amd: Initialize new aperture range before making it visible
  iommu/amd: Build io page-tables with cmpxchg64
  iommu/amd: Allocate new aperture ranges in dma_ops_alloc_addresses
  iommu/amd: Optimize dma_ops_free_addresses
  iommu/amd: Remove need_flush from struct dma_ops_domain
  iommu/amd: Iterate over all aperture ranges in dma_ops_area_alloc
  iommu/amd: Flush iommu tlb in dma_ops_free_addresses
  iommu/amd: Rename dma_ops_domain->next_address to next_index
  iommu/amd: Remove 'start' parameter from dma_ops_area_alloc
  iommu/amd: Flush iommu tlb in dma_ops_aperture_alloc()
  iommu/amd: Retry address allocation within one aperture
  iommu/amd: Move aperture_range.offset to another cache-line
  iommu/amd: Add dma_ops_aperture_alloc() function
  ...
parents a200dcb3 32704253
...@@ -7,7 +7,15 @@ connected to the IPMMU through a port called micro-TLB. ...@@ -7,7 +7,15 @@ connected to the IPMMU through a port called micro-TLB.
Required Properties: Required Properties:
- compatible: Must contain "renesas,ipmmu-vmsa". - compatible: Must contain SoC-specific and generic entries from below.
- "renesas,ipmmu-r8a73a4" for the R8A73A4 (R-Mobile APE6) IPMMU.
- "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU.
- "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU.
- "renesas,ipmmu-r8a7793" for the R8A7793 (R-Car M2-N) IPMMU.
- "renesas,ipmmu-r8a7794" for the R8A7794 (R-Car E2) IPMMU.
- "renesas,ipmmu-vmsa" for generic R-Car Gen2 VMSA-compatible IPMMU.
- reg: Base address and size of the IPMMU registers. - reg: Base address and size of the IPMMU registers.
- interrupts: Specifiers for the MMU fault interrupts. For instances that - interrupts: Specifiers for the MMU fault interrupts. For instances that
support secure mode two interrupts must be specified, for non-secure and support secure mode two interrupts must be specified, for non-secure and
...@@ -27,7 +35,7 @@ node with the following property: ...@@ -27,7 +35,7 @@ node with the following property:
Example: R8A7791 IPMMU-MX and VSP1-D0 bus master Example: R8A7791 IPMMU-MX and VSP1-D0 bus master
ipmmu_mx: mmu@fe951000 { ipmmu_mx: mmu@fe951000 {
compatible = "renasas,ipmmu-vmsa"; compatible = "renasas,ipmmu-r8a7791", "renasas,ipmmu-vmsa";
reg = <0 0xfe951000 0 0x1000>; reg = <0 0xfe951000 0 0x1000>;
interrupts = <0 222 IRQ_TYPE_LEVEL_HIGH>, interrupts = <0 222 IRQ_TYPE_LEVEL_HIGH>,
<0 221 IRQ_TYPE_LEVEL_HIGH>; <0 221 IRQ_TYPE_LEVEL_HIGH>;
......
...@@ -263,81 +263,6 @@ config EXYNOS_IOMMU_DEBUG ...@@ -263,81 +263,6 @@ config EXYNOS_IOMMU_DEBUG
Say N unless you need kernel log message for IOMMU debugging. Say N unless you need kernel log message for IOMMU debugging.
config SHMOBILE_IPMMU
bool
config SHMOBILE_IPMMU_TLB
bool
config SHMOBILE_IOMMU
bool "IOMMU for Renesas IPMMU/IPMMUI"
default n
depends on ARM && MMU
depends on ARCH_SHMOBILE || COMPILE_TEST
select IOMMU_API
select ARM_DMA_USE_IOMMU
select SHMOBILE_IPMMU
select SHMOBILE_IPMMU_TLB
help
Support for Renesas IPMMU/IPMMUI. This option enables
remapping of DMA memory accesses from all of the IP blocks
on the ICB.
Warning: Drivers (including userspace drivers of UIO
devices) of the IP blocks on the ICB *must* use addresses
allocated from the IPMMU (iova) for DMA with this option
enabled.
If unsure, say N.
choice
prompt "IPMMU/IPMMUI address space size"
default SHMOBILE_IOMMU_ADDRSIZE_2048MB
depends on SHMOBILE_IOMMU
help
This option sets IPMMU/IPMMUI address space size by
adjusting the 1st level page table size. The page table size
is calculated as follows:
page table size = number of page table entries * 4 bytes
number of page table entries = address space size / 1 MiB
For example, when the address space size is 2048 MiB, the
1st level page table size is 8192 bytes.
config SHMOBILE_IOMMU_ADDRSIZE_2048MB
bool "2 GiB"
config SHMOBILE_IOMMU_ADDRSIZE_1024MB
bool "1 GiB"
config SHMOBILE_IOMMU_ADDRSIZE_512MB
bool "512 MiB"
config SHMOBILE_IOMMU_ADDRSIZE_256MB
bool "256 MiB"
config SHMOBILE_IOMMU_ADDRSIZE_128MB
bool "128 MiB"
config SHMOBILE_IOMMU_ADDRSIZE_64MB
bool "64 MiB"
config SHMOBILE_IOMMU_ADDRSIZE_32MB
bool "32 MiB"
endchoice
config SHMOBILE_IOMMU_L1SIZE
int
default 8192 if SHMOBILE_IOMMU_ADDRSIZE_2048MB
default 4096 if SHMOBILE_IOMMU_ADDRSIZE_1024MB
default 2048 if SHMOBILE_IOMMU_ADDRSIZE_512MB
default 1024 if SHMOBILE_IOMMU_ADDRSIZE_256MB
default 512 if SHMOBILE_IOMMU_ADDRSIZE_128MB
default 256 if SHMOBILE_IOMMU_ADDRSIZE_64MB
default 128 if SHMOBILE_IOMMU_ADDRSIZE_32MB
config IPMMU_VMSA config IPMMU_VMSA
bool "Renesas VMSA-compatible IPMMU" bool "Renesas VMSA-compatible IPMMU"
depends on ARM_LPAE depends on ARM_LPAE
......
...@@ -22,7 +22,5 @@ obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o ...@@ -22,7 +22,5 @@ obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o
obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o
obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include <linux/msi.h> #include <linux/msi.h>
#include <linux/dma-contiguous.h> #include <linux/dma-contiguous.h>
#include <linux/irqdomain.h> #include <linux/irqdomain.h>
#include <linux/percpu.h>
#include <asm/irq_remapping.h> #include <asm/irq_remapping.h>
#include <asm/io_apic.h> #include <asm/io_apic.h>
#include <asm/apic.h> #include <asm/apic.h>
...@@ -114,6 +115,45 @@ struct kmem_cache *amd_iommu_irq_cache; ...@@ -114,6 +115,45 @@ struct kmem_cache *amd_iommu_irq_cache;
static void update_domain(struct protection_domain *domain); static void update_domain(struct protection_domain *domain);
static int protection_domain_init(struct protection_domain *domain); static int protection_domain_init(struct protection_domain *domain);
/*
* For dynamic growth the aperture size is split into ranges of 128MB of
* DMA address space each. This struct represents one such range.
*/
struct aperture_range {
spinlock_t bitmap_lock;
/* address allocation bitmap */
unsigned long *bitmap;
unsigned long offset;
unsigned long next_bit;
/*
* Array of PTE pages for the aperture. In this array we save all the
* leaf pages of the domain page table used for the aperture. This way
* we don't need to walk the page table to find a specific PTE. We can
* just calculate its address in constant time.
*/
u64 *pte_pages[64];
};
/*
* Data container for a dma_ops specific protection domain
*/
struct dma_ops_domain {
/* generic protection domain information */
struct protection_domain domain;
/* size of the aperture for the mappings */
unsigned long aperture_size;
/* aperture index we start searching for free addresses */
u32 __percpu *next_index;
/* address space relevant data */
struct aperture_range *aperture[APERTURE_MAX_RANGES];
};
/**************************************************************************** /****************************************************************************
* *
* Helper functions * Helper functions
...@@ -1167,11 +1207,21 @@ static u64 *alloc_pte(struct protection_domain *domain, ...@@ -1167,11 +1207,21 @@ static u64 *alloc_pte(struct protection_domain *domain,
end_lvl = PAGE_SIZE_LEVEL(page_size); end_lvl = PAGE_SIZE_LEVEL(page_size);
while (level > end_lvl) { while (level > end_lvl) {
if (!IOMMU_PTE_PRESENT(*pte)) { u64 __pte, __npte;
__pte = *pte;
if (!IOMMU_PTE_PRESENT(__pte)) {
page = (u64 *)get_zeroed_page(gfp); page = (u64 *)get_zeroed_page(gfp);
if (!page) if (!page)
return NULL; return NULL;
*pte = PM_LEVEL_PDE(level, virt_to_phys(page));
__npte = PM_LEVEL_PDE(level, virt_to_phys(page));
if (cmpxchg64(pte, __pte, __npte)) {
free_page((unsigned long)page);
continue;
}
} }
/* No level skipping support yet */ /* No level skipping support yet */
...@@ -1376,8 +1426,10 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, ...@@ -1376,8 +1426,10 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
bool populate, gfp_t gfp) bool populate, gfp_t gfp)
{ {
int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
struct amd_iommu *iommu;
unsigned long i, old_size, pte_pgsize; unsigned long i, old_size, pte_pgsize;
struct aperture_range *range;
struct amd_iommu *iommu;
unsigned long flags;
#ifdef CONFIG_IOMMU_STRESS #ifdef CONFIG_IOMMU_STRESS
populate = false; populate = false;
...@@ -1386,15 +1438,17 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, ...@@ -1386,15 +1438,17 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
if (index >= APERTURE_MAX_RANGES) if (index >= APERTURE_MAX_RANGES)
return -ENOMEM; return -ENOMEM;
dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp); range = kzalloc(sizeof(struct aperture_range), gfp);
if (!dma_dom->aperture[index]) if (!range)
return -ENOMEM; return -ENOMEM;
dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp); range->bitmap = (void *)get_zeroed_page(gfp);
if (!dma_dom->aperture[index]->bitmap) if (!range->bitmap)
goto out_free; goto out_free;
dma_dom->aperture[index]->offset = dma_dom->aperture_size; range->offset = dma_dom->aperture_size;
spin_lock_init(&range->bitmap_lock);
if (populate) { if (populate) {
unsigned long address = dma_dom->aperture_size; unsigned long address = dma_dom->aperture_size;
...@@ -1407,14 +1461,20 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, ...@@ -1407,14 +1461,20 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
if (!pte) if (!pte)
goto out_free; goto out_free;
dma_dom->aperture[index]->pte_pages[i] = pte_page; range->pte_pages[i] = pte_page;
address += APERTURE_RANGE_SIZE / 64; address += APERTURE_RANGE_SIZE / 64;
} }
} }
old_size = dma_dom->aperture_size; spin_lock_irqsave(&dma_dom->domain.lock, flags);
dma_dom->aperture_size += APERTURE_RANGE_SIZE;
/* First take the bitmap_lock and then publish the range */
spin_lock(&range->bitmap_lock);
old_size = dma_dom->aperture_size;
dma_dom->aperture[index] = range;
dma_dom->aperture_size += APERTURE_RANGE_SIZE;
/* Reserve address range used for MSI messages */ /* Reserve address range used for MSI messages */
if (old_size < MSI_ADDR_BASE_LO && if (old_size < MSI_ADDR_BASE_LO &&
...@@ -1461,62 +1521,123 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, ...@@ -1461,62 +1521,123 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
update_domain(&dma_dom->domain); update_domain(&dma_dom->domain);
spin_unlock(&range->bitmap_lock);
spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
return 0; return 0;
out_free: out_free:
update_domain(&dma_dom->domain); update_domain(&dma_dom->domain);
free_page((unsigned long)dma_dom->aperture[index]->bitmap); free_page((unsigned long)range->bitmap);
kfree(dma_dom->aperture[index]); kfree(range);
dma_dom->aperture[index] = NULL;
return -ENOMEM; return -ENOMEM;
} }
static dma_addr_t dma_ops_aperture_alloc(struct dma_ops_domain *dom,
struct aperture_range *range,
unsigned long pages,
unsigned long dma_mask,
unsigned long boundary_size,
unsigned long align_mask,
bool trylock)
{
unsigned long offset, limit, flags;
dma_addr_t address;
bool flush = false;
offset = range->offset >> PAGE_SHIFT;
limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
dma_mask >> PAGE_SHIFT);
if (trylock) {
if (!spin_trylock_irqsave(&range->bitmap_lock, flags))
return -1;
} else {
spin_lock_irqsave(&range->bitmap_lock, flags);
}
address = iommu_area_alloc(range->bitmap, limit, range->next_bit,
pages, offset, boundary_size, align_mask);
if (address == -1) {
/* Nothing found, retry one time */
address = iommu_area_alloc(range->bitmap, limit,
0, pages, offset, boundary_size,
align_mask);
flush = true;
}
if (address != -1)
range->next_bit = address + pages;
spin_unlock_irqrestore(&range->bitmap_lock, flags);
if (flush) {
domain_flush_tlb(&dom->domain);
domain_flush_complete(&dom->domain);
}
return address;
}
static unsigned long dma_ops_area_alloc(struct device *dev, static unsigned long dma_ops_area_alloc(struct device *dev,
struct dma_ops_domain *dom, struct dma_ops_domain *dom,
unsigned int pages, unsigned int pages,
unsigned long align_mask, unsigned long align_mask,
u64 dma_mask, u64 dma_mask)
unsigned long start)
{ {
unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
int i = start >> APERTURE_RANGE_SHIFT;
unsigned long boundary_size, mask; unsigned long boundary_size, mask;
unsigned long address = -1; unsigned long address = -1;
unsigned long limit; bool first = true;
u32 start, i;
next_bit >>= PAGE_SHIFT; preempt_disable();
mask = dma_get_seg_boundary(dev); mask = dma_get_seg_boundary(dev);
again:
start = this_cpu_read(*dom->next_index);
/* Sanity check - is it really necessary? */
if (unlikely(start > APERTURE_MAX_RANGES)) {
start = 0;
this_cpu_write(*dom->next_index, 0);
}
boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT : boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT :
1UL << (BITS_PER_LONG - PAGE_SHIFT); 1UL << (BITS_PER_LONG - PAGE_SHIFT);
for (;i < max_index; ++i) { for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT; struct aperture_range *range;
int index;
if (dom->aperture[i]->offset >= dma_mask) index = (start + i) % APERTURE_MAX_RANGES;
break;
limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, range = dom->aperture[index];
dma_mask >> PAGE_SHIFT);
address = iommu_area_alloc(dom->aperture[i]->bitmap, if (!range || range->offset >= dma_mask)
limit, next_bit, pages, 0, continue;
boundary_size, align_mask);
address = dma_ops_aperture_alloc(dom, range, pages,
dma_mask, boundary_size,
align_mask, first);
if (address != -1) { if (address != -1) {
address = dom->aperture[i]->offset + address = range->offset + (address << PAGE_SHIFT);
(address << PAGE_SHIFT); this_cpu_write(*dom->next_index, index);
dom->next_address = address + (pages << PAGE_SHIFT);
break; break;
} }
}
next_bit = 0; if (address == -1 && first) {
first = false;
goto again;
} }
preempt_enable();
return address; return address;
} }
...@@ -1526,21 +1647,14 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, ...@@ -1526,21 +1647,14 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
unsigned long align_mask, unsigned long align_mask,
u64 dma_mask) u64 dma_mask)
{ {
unsigned long address; unsigned long address = -1;
#ifdef CONFIG_IOMMU_STRESS
dom->next_address = 0;
dom->need_flush = true;
#endif
address = dma_ops_area_alloc(dev, dom, pages, align_mask, while (address == -1) {
dma_mask, dom->next_address); address = dma_ops_area_alloc(dev, dom, pages,
align_mask, dma_mask);
if (address == -1) { if (address == -1 && alloc_new_range(dom, false, GFP_ATOMIC))
dom->next_address = 0; break;
address = dma_ops_area_alloc(dev, dom, pages, align_mask,
dma_mask, 0);
dom->need_flush = true;
} }
if (unlikely(address == -1)) if (unlikely(address == -1))
...@@ -1562,6 +1676,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, ...@@ -1562,6 +1676,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
{ {
unsigned i = address >> APERTURE_RANGE_SHIFT; unsigned i = address >> APERTURE_RANGE_SHIFT;
struct aperture_range *range = dom->aperture[i]; struct aperture_range *range = dom->aperture[i];
unsigned long flags;
BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
...@@ -1570,12 +1685,18 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, ...@@ -1570,12 +1685,18 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
return; return;
#endif #endif
if (address >= dom->next_address) if (amd_iommu_unmap_flush) {
dom->need_flush = true; domain_flush_tlb(&dom->domain);
domain_flush_complete(&dom->domain);
}
address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
spin_lock_irqsave(&range->bitmap_lock, flags);
if (address + pages > range->next_bit)
range->next_bit = address + pages;
bitmap_clear(range->bitmap, address, pages); bitmap_clear(range->bitmap, address, pages);
spin_unlock_irqrestore(&range->bitmap_lock, flags);
} }
...@@ -1755,6 +1876,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) ...@@ -1755,6 +1876,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
if (!dom) if (!dom)
return; return;
free_percpu(dom->next_index);
del_domain_from_list(&dom->domain); del_domain_from_list(&dom->domain);
free_pagetable(&dom->domain); free_pagetable(&dom->domain);
...@@ -1769,6 +1892,23 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) ...@@ -1769,6 +1892,23 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
kfree(dom); kfree(dom);
} }
static int dma_ops_domain_alloc_apertures(struct dma_ops_domain *dma_dom,
int max_apertures)
{
int ret, i, apertures;
apertures = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
ret = 0;
for (i = apertures; i < max_apertures; ++i) {
ret = alloc_new_range(dma_dom, false, GFP_KERNEL);
if (ret)
break;
}
return ret;
}
/* /*
* Allocates a new protection domain usable for the dma_ops functions. * Allocates a new protection domain usable for the dma_ops functions.
* It also initializes the page table and the address allocator data * It also initializes the page table and the address allocator data
...@@ -1777,6 +1917,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) ...@@ -1777,6 +1917,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
static struct dma_ops_domain *dma_ops_domain_alloc(void) static struct dma_ops_domain *dma_ops_domain_alloc(void)
{ {
struct dma_ops_domain *dma_dom; struct dma_ops_domain *dma_dom;
int cpu;
dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
if (!dma_dom) if (!dma_dom)
...@@ -1785,6 +1926,10 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) ...@@ -1785,6 +1926,10 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
if (protection_domain_init(&dma_dom->domain)) if (protection_domain_init(&dma_dom->domain))
goto free_dma_dom; goto free_dma_dom;
dma_dom->next_index = alloc_percpu(u32);
if (!dma_dom->next_index)
goto free_dma_dom;
dma_dom->domain.mode = PAGE_MODE_2_LEVEL; dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
dma_dom->domain.flags = PD_DMA_OPS_MASK; dma_dom->domain.flags = PD_DMA_OPS_MASK;
...@@ -1792,8 +1937,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) ...@@ -1792,8 +1937,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
if (!dma_dom->domain.pt_root) if (!dma_dom->domain.pt_root)
goto free_dma_dom; goto free_dma_dom;
dma_dom->need_flush = false;
add_domain_to_list(&dma_dom->domain); add_domain_to_list(&dma_dom->domain);
if (alloc_new_range(dma_dom, true, GFP_KERNEL)) if (alloc_new_range(dma_dom, true, GFP_KERNEL))
...@@ -1804,8 +1947,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) ...@@ -1804,8 +1947,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
* a valid dma-address. So we can use 0 as error value * a valid dma-address. So we can use 0 as error value
*/ */
dma_dom->aperture[0]->bitmap[0] = 1; dma_dom->aperture[0]->bitmap[0] = 1;
dma_dom->next_address = 0;
for_each_possible_cpu(cpu)
*per_cpu_ptr(dma_dom->next_index, cpu) = 0;
return dma_dom; return dma_dom;
...@@ -2328,7 +2472,7 @@ static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, ...@@ -2328,7 +2472,7 @@ static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
else if (direction == DMA_BIDIRECTIONAL) else if (direction == DMA_BIDIRECTIONAL)
__pte |= IOMMU_PTE_IR | IOMMU_PTE_IW; __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
WARN_ON(*pte); WARN_ON_ONCE(*pte);
*pte = __pte; *pte = __pte;
...@@ -2357,7 +2501,7 @@ static void dma_ops_domain_unmap(struct dma_ops_domain *dom, ...@@ -2357,7 +2501,7 @@ static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
pte += PM_LEVEL_INDEX(0, address); pte += PM_LEVEL_INDEX(0, address);
WARN_ON(!*pte); WARN_ON_ONCE(!*pte);
*pte = 0ULL; *pte = 0ULL;
} }
...@@ -2393,26 +2537,11 @@ static dma_addr_t __map_single(struct device *dev, ...@@ -2393,26 +2537,11 @@ static dma_addr_t __map_single(struct device *dev,
if (align) if (align)
align_mask = (1UL << get_order(size)) - 1; align_mask = (1UL << get_order(size)) - 1;
retry:
address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
dma_mask); dma_mask);
if (unlikely(address == DMA_ERROR_CODE)) {
/*
* setting next_address here will let the address
* allocator only scan the new allocated range in the
* first run. This is a small optimization.
*/
dma_dom->next_address = dma_dom->aperture_size;
if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) if (address == DMA_ERROR_CODE)
goto out; goto out;
/*
* aperture was successfully enlarged by 128 MB, try
* allocation again
*/
goto retry;
}
start = address; start = address;
for (i = 0; i < pages; ++i) { for (i = 0; i < pages; ++i) {
...@@ -2427,11 +2556,10 @@ static dma_addr_t __map_single(struct device *dev, ...@@ -2427,11 +2556,10 @@ static dma_addr_t __map_single(struct device *dev,
ADD_STATS_COUNTER(alloced_io_mem, size); ADD_STATS_COUNTER(alloced_io_mem, size);
if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { if (unlikely(amd_iommu_np_cache)) {
domain_flush_tlb(&dma_dom->domain);
dma_dom->need_flush = false;
} else if (unlikely(amd_iommu_np_cache))
domain_flush_pages(&dma_dom->domain, address, size); domain_flush_pages(&dma_dom->domain, address, size);
domain_flush_complete(&dma_dom->domain);
}
out: out:
return address; return address;
...@@ -2478,11 +2606,6 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, ...@@ -2478,11 +2606,6 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
SUB_STATS_COUNTER(alloced_io_mem, size); SUB_STATS_COUNTER(alloced_io_mem, size);
dma_ops_free_addresses(dma_dom, dma_addr, pages); dma_ops_free_addresses(dma_dom, dma_addr, pages);
if (amd_iommu_unmap_flush || dma_dom->need_flush) {
domain_flush_pages(&dma_dom->domain, flush_addr, size);
dma_dom->need_flush = false;
}
} }
/* /*
...@@ -2493,11 +2616,9 @@ static dma_addr_t map_page(struct device *dev, struct page *page, ...@@ -2493,11 +2616,9 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
enum dma_data_direction dir, enum dma_data_direction dir,
struct dma_attrs *attrs) struct dma_attrs *attrs)
{ {
unsigned long flags; phys_addr_t paddr = page_to_phys(page) + offset;
struct protection_domain *domain; struct protection_domain *domain;
dma_addr_t addr;
u64 dma_mask; u64 dma_mask;
phys_addr_t paddr = page_to_phys(page) + offset;
INC_STATS_COUNTER(cnt_map_single); INC_STATS_COUNTER(cnt_map_single);
...@@ -2509,19 +2630,8 @@ static dma_addr_t map_page(struct device *dev, struct page *page, ...@@ -2509,19 +2630,8 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
dma_mask = *dev->dma_mask; dma_mask = *dev->dma_mask;
spin_lock_irqsave(&domain->lock, flags); return __map_single(dev, domain->priv, paddr, size, dir, false,
addr = __map_single(dev, domain->priv, paddr, size, dir, false,
dma_mask); dma_mask);
if (addr == DMA_ERROR_CODE)
goto out;
domain_flush_complete(domain);
out:
spin_unlock_irqrestore(&domain->lock, flags);
return addr;
} }
/* /*
...@@ -2530,7 +2640,6 @@ static dma_addr_t map_page(struct device *dev, struct page *page, ...@@ -2530,7 +2640,6 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
enum dma_data_direction dir, struct dma_attrs *attrs) enum dma_data_direction dir, struct dma_attrs *attrs)
{ {
unsigned long flags;
struct protection_domain *domain; struct protection_domain *domain;
INC_STATS_COUNTER(cnt_unmap_single); INC_STATS_COUNTER(cnt_unmap_single);
...@@ -2539,13 +2648,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, ...@@ -2539,13 +2648,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
if (IS_ERR(domain)) if (IS_ERR(domain))
return; return;
spin_lock_irqsave(&domain->lock, flags);
__unmap_single(domain->priv, dma_addr, size, dir); __unmap_single(domain->priv, dma_addr, size, dir);
domain_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
} }
/* /*
...@@ -2556,7 +2659,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, ...@@ -2556,7 +2659,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction dir, int nelems, enum dma_data_direction dir,
struct dma_attrs *attrs) struct dma_attrs *attrs)
{ {
unsigned long flags;
struct protection_domain *domain; struct protection_domain *domain;
int i; int i;
struct scatterlist *s; struct scatterlist *s;
...@@ -2572,8 +2674,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, ...@@ -2572,8 +2674,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
dma_mask = *dev->dma_mask; dma_mask = *dev->dma_mask;
spin_lock_irqsave(&domain->lock, flags);
for_each_sg(sglist, s, nelems, i) { for_each_sg(sglist, s, nelems, i) {
paddr = sg_phys(s); paddr = sg_phys(s);
...@@ -2588,12 +2688,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, ...@@ -2588,12 +2688,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
goto unmap; goto unmap;
} }
domain_flush_complete(domain);
out:
spin_unlock_irqrestore(&domain->lock, flags);
return mapped_elems; return mapped_elems;
unmap: unmap:
for_each_sg(sglist, s, mapped_elems, i) { for_each_sg(sglist, s, mapped_elems, i) {
if (s->dma_address) if (s->dma_address)
...@@ -2602,9 +2698,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, ...@@ -2602,9 +2698,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
s->dma_address = s->dma_length = 0; s->dma_address = s->dma_length = 0;
} }
mapped_elems = 0; return 0;
goto out;
} }
/* /*
...@@ -2615,7 +2709,6 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, ...@@ -2615,7 +2709,6 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction dir, int nelems, enum dma_data_direction dir,
struct dma_attrs *attrs) struct dma_attrs *attrs)
{ {
unsigned long flags;
struct protection_domain *domain; struct protection_domain *domain;
struct scatterlist *s; struct scatterlist *s;
int i; int i;
...@@ -2626,17 +2719,11 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, ...@@ -2626,17 +2719,11 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
if (IS_ERR(domain)) if (IS_ERR(domain))
return; return;
spin_lock_irqsave(&domain->lock, flags);
for_each_sg(sglist, s, nelems, i) { for_each_sg(sglist, s, nelems, i) {
__unmap_single(domain->priv, s->dma_address, __unmap_single(domain->priv, s->dma_address,
s->dma_length, dir); s->dma_length, dir);
s->dma_address = s->dma_length = 0; s->dma_address = s->dma_length = 0;
} }
domain_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
} }
/* /*
...@@ -2648,7 +2735,6 @@ static void *alloc_coherent(struct device *dev, size_t size, ...@@ -2648,7 +2735,6 @@ static void *alloc_coherent(struct device *dev, size_t size,
{ {
u64 dma_mask = dev->coherent_dma_mask; u64 dma_mask = dev->coherent_dma_mask;
struct protection_domain *domain; struct protection_domain *domain;
unsigned long flags;
struct page *page; struct page *page;
INC_STATS_COUNTER(cnt_alloc_coherent); INC_STATS_COUNTER(cnt_alloc_coherent);
...@@ -2680,19 +2766,11 @@ static void *alloc_coherent(struct device *dev, size_t size, ...@@ -2680,19 +2766,11 @@ static void *alloc_coherent(struct device *dev, size_t size,
if (!dma_mask) if (!dma_mask)
dma_mask = *dev->dma_mask; dma_mask = *dev->dma_mask;
spin_lock_irqsave(&domain->lock, flags);
*dma_addr = __map_single(dev, domain->priv, page_to_phys(page), *dma_addr = __map_single(dev, domain->priv, page_to_phys(page),
size, DMA_BIDIRECTIONAL, true, dma_mask); size, DMA_BIDIRECTIONAL, true, dma_mask);
if (*dma_addr == DMA_ERROR_CODE) { if (*dma_addr == DMA_ERROR_CODE)
spin_unlock_irqrestore(&domain->lock, flags);
goto out_free; goto out_free;
}
domain_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
return page_address(page); return page_address(page);
...@@ -2712,7 +2790,6 @@ static void free_coherent(struct device *dev, size_t size, ...@@ -2712,7 +2790,6 @@ static void free_coherent(struct device *dev, size_t size,
struct dma_attrs *attrs) struct dma_attrs *attrs)
{ {
struct protection_domain *domain; struct protection_domain *domain;
unsigned long flags;
struct page *page; struct page *page;
INC_STATS_COUNTER(cnt_free_coherent); INC_STATS_COUNTER(cnt_free_coherent);
...@@ -2724,14 +2801,8 @@ static void free_coherent(struct device *dev, size_t size, ...@@ -2724,14 +2801,8 @@ static void free_coherent(struct device *dev, size_t size,
if (IS_ERR(domain)) if (IS_ERR(domain))
goto free_mem; goto free_mem;
spin_lock_irqsave(&domain->lock, flags);
__unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
domain_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
free_mem: free_mem:
if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
__free_pages(page, get_order(size)); __free_pages(page, get_order(size));
...@@ -2746,14 +2817,43 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask) ...@@ -2746,14 +2817,43 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
return check_device(dev); return check_device(dev);
} }
static int set_dma_mask(struct device *dev, u64 mask)
{
struct protection_domain *domain;
int max_apertures = 1;
domain = get_domain(dev);
if (IS_ERR(domain))
return PTR_ERR(domain);
if (mask == DMA_BIT_MASK(64))
max_apertures = 8;
else if (mask > DMA_BIT_MASK(32))
max_apertures = 4;
/*
* To prevent lock contention it doesn't make sense to allocate more
* apertures than online cpus
*/
if (max_apertures > num_online_cpus())
max_apertures = num_online_cpus();
if (dma_ops_domain_alloc_apertures(domain->priv, max_apertures))
dev_err(dev, "Can't allocate %d iommu apertures\n",
max_apertures);
return 0;
}
static struct dma_map_ops amd_iommu_dma_ops = { static struct dma_map_ops amd_iommu_dma_ops = {
.alloc = alloc_coherent, .alloc = alloc_coherent,
.free = free_coherent, .free = free_coherent,
.map_page = map_page, .map_page = map_page,
.unmap_page = unmap_page, .unmap_page = unmap_page,
.map_sg = map_sg, .map_sg = map_sg,
.unmap_sg = unmap_sg, .unmap_sg = unmap_sg,
.dma_supported = amd_iommu_dma_supported, .dma_supported = amd_iommu_dma_supported,
.set_dma_mask = set_dma_mask,
}; };
int __init amd_iommu_init_api(void) int __init amd_iommu_init_api(void)
...@@ -3757,11 +3857,9 @@ static struct irq_domain *get_irq_domain(struct irq_alloc_info *info) ...@@ -3757,11 +3857,9 @@ static struct irq_domain *get_irq_domain(struct irq_alloc_info *info)
case X86_IRQ_ALLOC_TYPE_MSI: case X86_IRQ_ALLOC_TYPE_MSI:
case X86_IRQ_ALLOC_TYPE_MSIX: case X86_IRQ_ALLOC_TYPE_MSIX:
devid = get_device_id(&info->msi_dev->dev); devid = get_device_id(&info->msi_dev->dev);
if (devid >= 0) { iommu = amd_iommu_rlookup_table[devid];
iommu = amd_iommu_rlookup_table[devid]; if (iommu)
if (iommu) return iommu->msi_domain;
return iommu->msi_domain;
}
break; break;
default: default:
break; break;
......
...@@ -424,46 +424,6 @@ struct protection_domain { ...@@ -424,46 +424,6 @@ struct protection_domain {
void *priv; /* private data */ void *priv; /* private data */
}; };
/*
* For dynamic growth the aperture size is split into ranges of 128MB of
* DMA address space each. This struct represents one such range.
*/
struct aperture_range {
/* address allocation bitmap */
unsigned long *bitmap;
/*
* Array of PTE pages for the aperture. In this array we save all the
* leaf pages of the domain page table used for the aperture. This way
* we don't need to walk the page table to find a specific PTE. We can
* just calculate its address in constant time.
*/
u64 *pte_pages[64];
unsigned long offset;
};
/*
* Data container for a dma_ops specific protection domain
*/
struct dma_ops_domain {
/* generic protection domain information */
struct protection_domain domain;
/* size of the aperture for the mappings */
unsigned long aperture_size;
/* address we start to search for free addresses */
unsigned long next_address;
/* address space relevant data */
struct aperture_range *aperture[APERTURE_MAX_RANGES];
/* This will be set to true when TLB needs to be flushed */
bool need_flush;
};
/* /*
* Structure where we save information about one hardware AMD IOMMU in the * Structure where we save information about one hardware AMD IOMMU in the
* system. * system.
......
...@@ -432,7 +432,7 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) ...@@ -432,7 +432,7 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
unbind_pasid(pasid_state); unbind_pasid(pasid_state);
} }
static struct mmu_notifier_ops iommu_mn = { static const struct mmu_notifier_ops iommu_mn = {
.release = mn_release, .release = mn_release,
.clear_flush_young = mn_clear_flush_young, .clear_flush_young = mn_clear_flush_young,
.invalidate_page = mn_invalidate_page, .invalidate_page = mn_invalidate_page,
...@@ -513,43 +513,39 @@ static bool access_error(struct vm_area_struct *vma, struct fault *fault) ...@@ -513,43 +513,39 @@ static bool access_error(struct vm_area_struct *vma, struct fault *fault)
static void do_fault(struct work_struct *work) static void do_fault(struct work_struct *work)
{ {
struct fault *fault = container_of(work, struct fault, work); struct fault *fault = container_of(work, struct fault, work);
struct mm_struct *mm;
struct vm_area_struct *vma; struct vm_area_struct *vma;
int ret = VM_FAULT_ERROR;
unsigned int flags = 0;
struct mm_struct *mm;
u64 address; u64 address;
int ret, write;
write = !!(fault->flags & PPR_FAULT_WRITE);
mm = fault->state->mm; mm = fault->state->mm;
address = fault->address; address = fault->address;
if (fault->flags & PPR_FAULT_USER)
flags |= FAULT_FLAG_USER;
if (fault->flags & PPR_FAULT_WRITE)
flags |= FAULT_FLAG_WRITE;
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
vma = find_extend_vma(mm, address); vma = find_extend_vma(mm, address);
if (!vma || address < vma->vm_start) { if (!vma || address < vma->vm_start)
/* failed to get a vma in the right range */ /* failed to get a vma in the right range */
up_read(&mm->mmap_sem);
handle_fault_error(fault);
goto out; goto out;
}
/* Check if we have the right permissions on the vma */ /* Check if we have the right permissions on the vma */
if (access_error(vma, fault)) { if (access_error(vma, fault))
up_read(&mm->mmap_sem);
handle_fault_error(fault);
goto out; goto out;
}
ret = handle_mm_fault(mm, vma, address, write); ret = handle_mm_fault(mm, vma, address, flags);
if (ret & VM_FAULT_ERROR) {
/* failed to service fault */
up_read(&mm->mmap_sem);
handle_fault_error(fault);
goto out;
}
out:
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
out: if (ret & VM_FAULT_ERROR)
/* failed to service fault */
handle_fault_error(fault);
finish_pri_tag(fault->dev_state, fault->state, fault->tag); finish_pri_tag(fault->dev_state, fault->state, fault->tag);
put_pasid_state(fault->state); put_pasid_state(fault->state);
......
...@@ -40,7 +40,10 @@ ...@@ -40,7 +40,10 @@
#define IDR0_ST_LVL_SHIFT 27 #define IDR0_ST_LVL_SHIFT 27
#define IDR0_ST_LVL_MASK 0x3 #define IDR0_ST_LVL_MASK 0x3
#define IDR0_ST_LVL_2LVL (1 << IDR0_ST_LVL_SHIFT) #define IDR0_ST_LVL_2LVL (1 << IDR0_ST_LVL_SHIFT)
#define IDR0_STALL_MODEL (3 << 24) #define IDR0_STALL_MODEL_SHIFT 24
#define IDR0_STALL_MODEL_MASK 0x3
#define IDR0_STALL_MODEL_STALL (0 << IDR0_STALL_MODEL_SHIFT)
#define IDR0_STALL_MODEL_FORCE (2 << IDR0_STALL_MODEL_SHIFT)
#define IDR0_TTENDIAN_SHIFT 21 #define IDR0_TTENDIAN_SHIFT 21
#define IDR0_TTENDIAN_MASK 0x3 #define IDR0_TTENDIAN_MASK 0x3
#define IDR0_TTENDIAN_LE (2 << IDR0_TTENDIAN_SHIFT) #define IDR0_TTENDIAN_LE (2 << IDR0_TTENDIAN_SHIFT)
...@@ -253,6 +256,9 @@ ...@@ -253,6 +256,9 @@
#define STRTAB_STE_1_STRW_EL2 2UL #define STRTAB_STE_1_STRW_EL2 2UL
#define STRTAB_STE_1_STRW_SHIFT 30 #define STRTAB_STE_1_STRW_SHIFT 30
#define STRTAB_STE_1_SHCFG_INCOMING 1UL
#define STRTAB_STE_1_SHCFG_SHIFT 44
#define STRTAB_STE_2_S2VMID_SHIFT 0 #define STRTAB_STE_2_S2VMID_SHIFT 0
#define STRTAB_STE_2_S2VMID_MASK 0xffffUL #define STRTAB_STE_2_S2VMID_MASK 0xffffUL
#define STRTAB_STE_2_VTCR_SHIFT 32 #define STRTAB_STE_2_VTCR_SHIFT 32
...@@ -378,7 +384,6 @@ ...@@ -378,7 +384,6 @@
#define PRIQ_0_SID_MASK 0xffffffffUL #define PRIQ_0_SID_MASK 0xffffffffUL
#define PRIQ_0_SSID_SHIFT 32 #define PRIQ_0_SSID_SHIFT 32
#define PRIQ_0_SSID_MASK 0xfffffUL #define PRIQ_0_SSID_MASK 0xfffffUL
#define PRIQ_0_OF (1UL << 57)
#define PRIQ_0_PERM_PRIV (1UL << 58) #define PRIQ_0_PERM_PRIV (1UL << 58)
#define PRIQ_0_PERM_EXEC (1UL << 59) #define PRIQ_0_PERM_EXEC (1UL << 59)
#define PRIQ_0_PERM_READ (1UL << 60) #define PRIQ_0_PERM_READ (1UL << 60)
...@@ -855,15 +860,17 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) ...@@ -855,15 +860,17 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
}; };
dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons, dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
cerror_str[idx]); idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
switch (idx) { switch (idx) {
case CMDQ_ERR_CERROR_ILL_IDX:
break;
case CMDQ_ERR_CERROR_ABT_IDX: case CMDQ_ERR_CERROR_ABT_IDX:
dev_err(smmu->dev, "retrying command fetch\n"); dev_err(smmu->dev, "retrying command fetch\n");
case CMDQ_ERR_CERROR_NONE_IDX: case CMDQ_ERR_CERROR_NONE_IDX:
return; return;
case CMDQ_ERR_CERROR_ILL_IDX:
/* Fallthrough */
default:
break;
} }
/* /*
...@@ -1042,6 +1049,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, ...@@ -1042,6 +1049,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT
: STRTAB_STE_0_CFG_BYPASS; : STRTAB_STE_0_CFG_BYPASS;
dst[0] = cpu_to_le64(val); dst[0] = cpu_to_le64(val);
dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
<< STRTAB_STE_1_SHCFG_SHIFT);
dst[2] = 0; /* Nuke the VMID */ dst[2] = 0; /* Nuke the VMID */
if (ste_live) if (ste_live)
arm_smmu_sync_ste_for_sid(smmu, sid); arm_smmu_sync_ste_for_sid(smmu, sid);
...@@ -1056,12 +1065,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, ...@@ -1056,12 +1065,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
STRTAB_STE_1_S1C_CACHE_WBRA STRTAB_STE_1_S1C_CACHE_WBRA
<< STRTAB_STE_1_S1COR_SHIFT | << STRTAB_STE_1_S1COR_SHIFT |
STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT | STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT |
STRTAB_STE_1_S1STALLD |
#ifdef CONFIG_PCI_ATS #ifdef CONFIG_PCI_ATS
STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT | STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
#endif #endif
STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT); STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
if (smmu->features & ARM_SMMU_FEAT_STALLS)
dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
<< STRTAB_STE_0_S1CTXPTR_SHIFT) | << STRTAB_STE_0_S1CTXPTR_SHIFT) |
STRTAB_STE_0_CFG_S1_TRANS; STRTAB_STE_0_CFG_S1_TRANS;
...@@ -1123,8 +1134,8 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) ...@@ -1123,8 +1134,8 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS]; strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
desc->span = STRTAB_SPLIT + 1; desc->span = STRTAB_SPLIT + 1;
desc->l2ptr = dma_zalloc_coherent(smmu->dev, size, &desc->l2ptr_dma, desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
GFP_KERNEL); GFP_KERNEL | __GFP_ZERO);
if (!desc->l2ptr) { if (!desc->l2ptr) {
dev_err(smmu->dev, dev_err(smmu->dev,
"failed to allocate l2 stream table for SID %u\n", "failed to allocate l2 stream table for SID %u\n",
...@@ -1250,50 +1261,50 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu); ...@@ -1250,50 +1261,50 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
{ {
u32 gerror, gerrorn; u32 gerror, gerrorn, active;
struct arm_smmu_device *smmu = dev; struct arm_smmu_device *smmu = dev;
gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR); gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN); gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
gerror ^= gerrorn; active = gerror ^ gerrorn;
if (!(gerror & GERROR_ERR_MASK)) if (!(active & GERROR_ERR_MASK))
return IRQ_NONE; /* No errors pending */ return IRQ_NONE; /* No errors pending */
dev_warn(smmu->dev, dev_warn(smmu->dev,
"unexpected global error reported (0x%08x), this could be serious\n", "unexpected global error reported (0x%08x), this could be serious\n",
gerror); active);
if (gerror & GERROR_SFM_ERR) { if (active & GERROR_SFM_ERR) {
dev_err(smmu->dev, "device has entered Service Failure Mode!\n"); dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
arm_smmu_device_disable(smmu); arm_smmu_device_disable(smmu);
} }
if (gerror & GERROR_MSI_GERROR_ABT_ERR) if (active & GERROR_MSI_GERROR_ABT_ERR)
dev_warn(smmu->dev, "GERROR MSI write aborted\n"); dev_warn(smmu->dev, "GERROR MSI write aborted\n");
if (gerror & GERROR_MSI_PRIQ_ABT_ERR) { if (active & GERROR_MSI_PRIQ_ABT_ERR) {
dev_warn(smmu->dev, "PRIQ MSI write aborted\n"); dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
arm_smmu_priq_handler(irq, smmu->dev); arm_smmu_priq_handler(irq, smmu->dev);
} }
if (gerror & GERROR_MSI_EVTQ_ABT_ERR) { if (active & GERROR_MSI_EVTQ_ABT_ERR) {
dev_warn(smmu->dev, "EVTQ MSI write aborted\n"); dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
arm_smmu_evtq_handler(irq, smmu->dev); arm_smmu_evtq_handler(irq, smmu->dev);
} }
if (gerror & GERROR_MSI_CMDQ_ABT_ERR) { if (active & GERROR_MSI_CMDQ_ABT_ERR) {
dev_warn(smmu->dev, "CMDQ MSI write aborted\n"); dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
arm_smmu_cmdq_sync_handler(irq, smmu->dev); arm_smmu_cmdq_sync_handler(irq, smmu->dev);
} }
if (gerror & GERROR_PRIQ_ABT_ERR) if (active & GERROR_PRIQ_ABT_ERR)
dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n"); dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
if (gerror & GERROR_EVTQ_ABT_ERR) if (active & GERROR_EVTQ_ABT_ERR)
dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n"); dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
if (gerror & GERROR_CMDQ_ERR) if (active & GERROR_CMDQ_ERR)
arm_smmu_cmdq_skip_err(smmu); arm_smmu_cmdq_skip_err(smmu);
writel(gerror, smmu->base + ARM_SMMU_GERRORN); writel(gerror, smmu->base + ARM_SMMU_GERRORN);
...@@ -1335,7 +1346,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) ...@@ -1335,7 +1346,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
} }
static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
bool leaf, void *cookie) size_t granule, bool leaf, void *cookie)
{ {
struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_device *smmu = smmu_domain->smmu;
...@@ -1354,7 +1365,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, ...@@ -1354,7 +1365,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
} }
arm_smmu_cmdq_issue_cmd(smmu, &cmd); do {
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
cmd.tlbi.addr += granule;
} while (size -= granule);
} }
static struct iommu_gather_ops arm_smmu_gather_ops = { static struct iommu_gather_ops arm_smmu_gather_ops = {
...@@ -1429,10 +1443,10 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) ...@@ -1429,10 +1443,10 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
if (cfg->cdptr) { if (cfg->cdptr) {
dma_free_coherent(smmu_domain->smmu->dev, dmam_free_coherent(smmu_domain->smmu->dev,
CTXDESC_CD_DWORDS << 3, CTXDESC_CD_DWORDS << 3,
cfg->cdptr, cfg->cdptr,
cfg->cdptr_dma); cfg->cdptr_dma);
arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid); arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
} }
...@@ -1457,8 +1471,9 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, ...@@ -1457,8 +1471,9 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
if (IS_ERR_VALUE(asid)) if (IS_ERR_VALUE(asid))
return asid; return asid;
cfg->cdptr = dma_zalloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3, cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
&cfg->cdptr_dma, GFP_KERNEL); &cfg->cdptr_dma,
GFP_KERNEL | __GFP_ZERO);
if (!cfg->cdptr) { if (!cfg->cdptr) {
dev_warn(smmu->dev, "failed to allocate context descriptor\n"); dev_warn(smmu->dev, "failed to allocate context descriptor\n");
ret = -ENOMEM; ret = -ENOMEM;
...@@ -1804,13 +1819,13 @@ static int arm_smmu_add_device(struct device *dev) ...@@ -1804,13 +1819,13 @@ static int arm_smmu_add_device(struct device *dev)
smmu = arm_smmu_get_for_pci_dev(pdev); smmu = arm_smmu_get_for_pci_dev(pdev);
if (!smmu) { if (!smmu) {
ret = -ENOENT; ret = -ENOENT;
goto out_put_group; goto out_remove_dev;
} }
smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL); smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL);
if (!smmu_group) { if (!smmu_group) {
ret = -ENOMEM; ret = -ENOMEM;
goto out_put_group; goto out_remove_dev;
} }
smmu_group->ste.valid = true; smmu_group->ste.valid = true;
...@@ -1826,20 +1841,20 @@ static int arm_smmu_add_device(struct device *dev) ...@@ -1826,20 +1841,20 @@ static int arm_smmu_add_device(struct device *dev)
for (i = 0; i < smmu_group->num_sids; ++i) { for (i = 0; i < smmu_group->num_sids; ++i) {
/* If we already know about this SID, then we're done */ /* If we already know about this SID, then we're done */
if (smmu_group->sids[i] == sid) if (smmu_group->sids[i] == sid)
return 0; goto out_put_group;
} }
/* Check the SID is in range of the SMMU and our stream table */ /* Check the SID is in range of the SMMU and our stream table */
if (!arm_smmu_sid_in_range(smmu, sid)) { if (!arm_smmu_sid_in_range(smmu, sid)) {
ret = -ERANGE; ret = -ERANGE;
goto out_put_group; goto out_remove_dev;
} }
/* Ensure l2 strtab is initialised */ /* Ensure l2 strtab is initialised */
if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
ret = arm_smmu_init_l2_strtab(smmu, sid); ret = arm_smmu_init_l2_strtab(smmu, sid);
if (ret) if (ret)
goto out_put_group; goto out_remove_dev;
} }
/* Resize the SID array for the group */ /* Resize the SID array for the group */
...@@ -1849,15 +1864,19 @@ static int arm_smmu_add_device(struct device *dev) ...@@ -1849,15 +1864,19 @@ static int arm_smmu_add_device(struct device *dev)
if (!sids) { if (!sids) {
smmu_group->num_sids--; smmu_group->num_sids--;
ret = -ENOMEM; ret = -ENOMEM;
goto out_put_group; goto out_remove_dev;
} }
/* Add the new SID */ /* Add the new SID */
sids[smmu_group->num_sids - 1] = sid; sids[smmu_group->num_sids - 1] = sid;
smmu_group->sids = sids; smmu_group->sids = sids;
return 0;
out_put_group: out_put_group:
iommu_group_put(group);
return 0;
out_remove_dev:
iommu_group_remove_device(dev);
iommu_group_put(group); iommu_group_put(group);
return ret; return ret;
} }
...@@ -1937,7 +1956,7 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, ...@@ -1937,7 +1956,7 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
{ {
size_t qsz = ((1 << q->max_n_shift) * dwords) << 3; size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
q->base = dma_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL); q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
if (!q->base) { if (!q->base) {
dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n", dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
qsz); qsz);
...@@ -1957,23 +1976,6 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, ...@@ -1957,23 +1976,6 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
return 0; return 0;
} }
static void arm_smmu_free_one_queue(struct arm_smmu_device *smmu,
struct arm_smmu_queue *q)
{
size_t qsz = ((1 << q->max_n_shift) * q->ent_dwords) << 3;
dma_free_coherent(smmu->dev, qsz, q->base, q->base_dma);
}
static void arm_smmu_free_queues(struct arm_smmu_device *smmu)
{
arm_smmu_free_one_queue(smmu, &smmu->cmdq.q);
arm_smmu_free_one_queue(smmu, &smmu->evtq.q);
if (smmu->features & ARM_SMMU_FEAT_PRI)
arm_smmu_free_one_queue(smmu, &smmu->priq.q);
}
static int arm_smmu_init_queues(struct arm_smmu_device *smmu) static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
{ {
int ret; int ret;
...@@ -1983,49 +1985,20 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) ...@@ -1983,49 +1985,20 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD, ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS); ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
if (ret) if (ret)
goto out; return ret;
/* evtq */ /* evtq */
ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD, ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS); ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
if (ret) if (ret)
goto out_free_cmdq; return ret;
/* priq */ /* priq */
if (!(smmu->features & ARM_SMMU_FEAT_PRI)) if (!(smmu->features & ARM_SMMU_FEAT_PRI))
return 0; return 0;
ret = arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD, return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS); ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
if (ret)
goto out_free_evtq;
return 0;
out_free_evtq:
arm_smmu_free_one_queue(smmu, &smmu->evtq.q);
out_free_cmdq:
arm_smmu_free_one_queue(smmu, &smmu->cmdq.q);
out:
return ret;
}
static void arm_smmu_free_l2_strtab(struct arm_smmu_device *smmu)
{
int i;
size_t size;
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
for (i = 0; i < cfg->num_l1_ents; ++i) {
struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[i];
if (!desc->l2ptr)
continue;
dma_free_coherent(smmu->dev, size, desc->l2ptr,
desc->l2ptr_dma);
}
} }
static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
...@@ -2054,7 +2027,6 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) ...@@ -2054,7 +2027,6 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
void *strtab; void *strtab;
u64 reg; u64 reg;
u32 size, l1size; u32 size, l1size;
int ret;
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
/* /*
...@@ -2077,8 +2049,8 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) ...@@ -2077,8 +2049,8 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
size, smmu->sid_bits); size, smmu->sid_bits);
l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
strtab = dma_zalloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
GFP_KERNEL); GFP_KERNEL | __GFP_ZERO);
if (!strtab) { if (!strtab) {
dev_err(smmu->dev, dev_err(smmu->dev,
"failed to allocate l1 stream table (%u bytes)\n", "failed to allocate l1 stream table (%u bytes)\n",
...@@ -2095,13 +2067,7 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) ...@@ -2095,13 +2067,7 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
<< STRTAB_BASE_CFG_SPLIT_SHIFT; << STRTAB_BASE_CFG_SPLIT_SHIFT;
cfg->strtab_base_cfg = reg; cfg->strtab_base_cfg = reg;
ret = arm_smmu_init_l1_strtab(smmu); return arm_smmu_init_l1_strtab(smmu);
if (ret)
dma_free_coherent(smmu->dev,
l1size,
strtab,
cfg->strtab_dma);
return ret;
} }
static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
...@@ -2112,8 +2078,8 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) ...@@ -2112,8 +2078,8 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3); size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
strtab = dma_zalloc_coherent(smmu->dev, size, &cfg->strtab_dma, strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
GFP_KERNEL); GFP_KERNEL | __GFP_ZERO);
if (!strtab) { if (!strtab) {
dev_err(smmu->dev, dev_err(smmu->dev,
"failed to allocate linear stream table (%u bytes)\n", "failed to allocate linear stream table (%u bytes)\n",
...@@ -2157,21 +2123,6 @@ static int arm_smmu_init_strtab(struct arm_smmu_device *smmu) ...@@ -2157,21 +2123,6 @@ static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
return 0; return 0;
} }
static void arm_smmu_free_strtab(struct arm_smmu_device *smmu)
{
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
u32 size = cfg->num_l1_ents;
if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
arm_smmu_free_l2_strtab(smmu);
size *= STRTAB_L1_DESC_DWORDS << 3;
} else {
size *= STRTAB_STE_DWORDS * 3;
}
dma_free_coherent(smmu->dev, size, cfg->strtab, cfg->strtab_dma);
}
static int arm_smmu_init_structures(struct arm_smmu_device *smmu) static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
{ {
int ret; int ret;
...@@ -2180,21 +2131,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu) ...@@ -2180,21 +2131,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
if (ret) if (ret)
return ret; return ret;
ret = arm_smmu_init_strtab(smmu); return arm_smmu_init_strtab(smmu);
if (ret)
goto out_free_queues;
return 0;
out_free_queues:
arm_smmu_free_queues(smmu);
return ret;
}
static void arm_smmu_free_structures(struct arm_smmu_device *smmu)
{
arm_smmu_free_strtab(smmu);
arm_smmu_free_queues(smmu);
} }
static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
...@@ -2532,8 +2469,12 @@ static int arm_smmu_device_probe(struct arm_smmu_device *smmu) ...@@ -2532,8 +2469,12 @@ static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n", dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n",
coherent ? "true" : "false"); coherent ? "true" : "false");
if (reg & IDR0_STALL_MODEL) switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
case IDR0_STALL_MODEL_STALL:
/* Fallthrough */
case IDR0_STALL_MODEL_FORCE:
smmu->features |= ARM_SMMU_FEAT_STALLS; smmu->features |= ARM_SMMU_FEAT_STALLS;
}
if (reg & IDR0_S1P) if (reg & IDR0_S1P)
smmu->features |= ARM_SMMU_FEAT_TRANS_S1; smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
...@@ -2699,15 +2640,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) ...@@ -2699,15 +2640,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, smmu); platform_set_drvdata(pdev, smmu);
/* Reset the device */ /* Reset the device */
ret = arm_smmu_device_reset(smmu); return arm_smmu_device_reset(smmu);
if (ret)
goto out_free_structures;
return 0;
out_free_structures:
arm_smmu_free_structures(smmu);
return ret;
} }
static int arm_smmu_device_remove(struct platform_device *pdev) static int arm_smmu_device_remove(struct platform_device *pdev)
...@@ -2715,7 +2648,6 @@ static int arm_smmu_device_remove(struct platform_device *pdev) ...@@ -2715,7 +2648,6 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
struct arm_smmu_device *smmu = platform_get_drvdata(pdev); struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
arm_smmu_device_disable(smmu); arm_smmu_device_disable(smmu);
arm_smmu_free_structures(smmu);
return 0; return 0;
} }
......
...@@ -582,7 +582,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) ...@@ -582,7 +582,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
} }
static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
bool leaf, void *cookie) size_t granule, bool leaf, void *cookie)
{ {
struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg; struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
...@@ -597,12 +597,18 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, ...@@ -597,12 +597,18 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
if (!IS_ENABLED(CONFIG_64BIT) || smmu->version == ARM_SMMU_V1) { if (!IS_ENABLED(CONFIG_64BIT) || smmu->version == ARM_SMMU_V1) {
iova &= ~12UL; iova &= ~12UL;
iova |= ARM_SMMU_CB_ASID(cfg); iova |= ARM_SMMU_CB_ASID(cfg);
writel_relaxed(iova, reg); do {
writel_relaxed(iova, reg);
iova += granule;
} while (size -= granule);
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
} else { } else {
iova >>= 12; iova >>= 12;
iova |= (u64)ARM_SMMU_CB_ASID(cfg) << 48; iova |= (u64)ARM_SMMU_CB_ASID(cfg) << 48;
writeq_relaxed(iova, reg); do {
writeq_relaxed(iova, reg);
iova += granule >> 12;
} while (size -= granule);
#endif #endif
} }
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
...@@ -610,7 +616,11 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, ...@@ -610,7 +616,11 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
ARM_SMMU_CB_S2_TLBIIPAS2; ARM_SMMU_CB_S2_TLBIIPAS2;
writeq_relaxed(iova >> 12, reg); iova >>= 12;
do {
writeq_relaxed(iova, reg);
iova += granule >> 12;
} while (size -= granule);
#endif #endif
} else { } else {
reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID; reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID;
...@@ -945,9 +955,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) ...@@ -945,9 +955,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
free_irq(irq, domain); free_irq(irq, domain);
} }
if (smmu_domain->pgtbl_ops) free_io_pgtable_ops(smmu_domain->pgtbl_ops);
free_io_pgtable_ops(smmu_domain->pgtbl_ops);
__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
} }
...@@ -1357,6 +1365,7 @@ static int arm_smmu_add_device(struct device *dev) ...@@ -1357,6 +1365,7 @@ static int arm_smmu_add_device(struct device *dev)
if (IS_ERR(group)) if (IS_ERR(group))
return PTR_ERR(group); return PTR_ERR(group);
iommu_group_put(group);
return 0; return 0;
} }
......
...@@ -1063,13 +1063,19 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) ...@@ -1063,13 +1063,19 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
raw_spin_lock_init(&iommu->register_lock); raw_spin_lock_init(&iommu->register_lock);
drhd->iommu = iommu; if (intel_iommu_enabled) {
if (intel_iommu_enabled)
iommu->iommu_dev = iommu_device_create(NULL, iommu, iommu->iommu_dev = iommu_device_create(NULL, iommu,
intel_iommu_groups, intel_iommu_groups,
"%s", iommu->name); "%s", iommu->name);
if (IS_ERR(iommu->iommu_dev)) {
err = PTR_ERR(iommu->iommu_dev);
goto err_unmap;
}
}
drhd->iommu = iommu;
return 0; return 0;
err_unmap: err_unmap:
......
...@@ -38,9 +38,6 @@ ...@@ -38,9 +38,6 @@
#define io_pgtable_to_data(x) \ #define io_pgtable_to_data(x) \
container_of((x), struct arm_lpae_io_pgtable, iop) container_of((x), struct arm_lpae_io_pgtable, iop)
#define io_pgtable_ops_to_pgtable(x) \
container_of((x), struct io_pgtable, ops)
#define io_pgtable_ops_to_data(x) \ #define io_pgtable_ops_to_data(x) \
io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
...@@ -58,8 +55,10 @@ ...@@ -58,8 +55,10 @@
((((d)->levels - ((l) - ARM_LPAE_START_LVL(d) + 1)) \ ((((d)->levels - ((l) - ARM_LPAE_START_LVL(d) + 1)) \
* (d)->bits_per_level) + (d)->pg_shift) * (d)->bits_per_level) + (d)->pg_shift)
#define ARM_LPAE_GRANULE(d) (1UL << (d)->pg_shift)
#define ARM_LPAE_PAGES_PER_PGD(d) \ #define ARM_LPAE_PAGES_PER_PGD(d) \
DIV_ROUND_UP((d)->pgd_size, 1UL << (d)->pg_shift) DIV_ROUND_UP((d)->pgd_size, ARM_LPAE_GRANULE(d))
/* /*
* Calculate the index at level l used to map virtual address a using the * Calculate the index at level l used to map virtual address a using the
...@@ -169,7 +168,7 @@ ...@@ -169,7 +168,7 @@
/* IOPTE accessors */ /* IOPTE accessors */
#define iopte_deref(pte,d) \ #define iopte_deref(pte,d) \
(__va((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1) \ (__va((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1) \
& ~((1ULL << (d)->pg_shift) - 1))) & ~(ARM_LPAE_GRANULE(d) - 1ULL)))
#define iopte_type(pte,l) \ #define iopte_type(pte,l) \
(((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK) (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK)
...@@ -326,7 +325,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, ...@@ -326,7 +325,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
/* Grab a pointer to the next level */ /* Grab a pointer to the next level */
pte = *ptep; pte = *ptep;
if (!pte) { if (!pte) {
cptep = __arm_lpae_alloc_pages(1UL << data->pg_shift, cptep = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data),
GFP_ATOMIC, cfg); GFP_ATOMIC, cfg);
if (!cptep) if (!cptep)
return -ENOMEM; return -ENOMEM;
...@@ -405,17 +404,18 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, ...@@ -405,17 +404,18 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
arm_lpae_iopte *start, *end; arm_lpae_iopte *start, *end;
unsigned long table_size; unsigned long table_size;
/* Only leaf entries at the last level */
if (lvl == ARM_LPAE_MAX_LEVELS - 1)
return;
if (lvl == ARM_LPAE_START_LVL(data)) if (lvl == ARM_LPAE_START_LVL(data))
table_size = data->pgd_size; table_size = data->pgd_size;
else else
table_size = 1UL << data->pg_shift; table_size = ARM_LPAE_GRANULE(data);
start = ptep; start = ptep;
end = (void *)ptep + table_size;
/* Only leaf entries at the last level */
if (lvl == ARM_LPAE_MAX_LEVELS - 1)
end = ptep;
else
end = (void *)ptep + table_size;
while (ptep != end) { while (ptep != end) {
arm_lpae_iopte pte = *ptep++; arm_lpae_iopte pte = *ptep++;
...@@ -473,7 +473,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, ...@@ -473,7 +473,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
__arm_lpae_set_pte(ptep, table, cfg); __arm_lpae_set_pte(ptep, table, cfg);
iova &= ~(blk_size - 1); iova &= ~(blk_size - 1);
cfg->tlb->tlb_add_flush(iova, blk_size, true, data->iop.cookie); cfg->tlb->tlb_add_flush(iova, blk_size, blk_size, true, data->iop.cookie);
return size; return size;
} }
...@@ -486,11 +486,13 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, ...@@ -486,11 +486,13 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
void *cookie = data->iop.cookie; void *cookie = data->iop.cookie;
size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data); size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
/* Something went horribly wrong and we ran out of page table */
if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
return 0;
ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
pte = *ptep; pte = *ptep;
if (WARN_ON(!pte))
/* Something went horribly wrong and we ran out of page table */
if (WARN_ON(!pte || (lvl == ARM_LPAE_MAX_LEVELS)))
return 0; return 0;
/* If the size matches this level, we're in the right place */ /* If the size matches this level, we're in the right place */
...@@ -499,12 +501,13 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, ...@@ -499,12 +501,13 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
if (!iopte_leaf(pte, lvl)) { if (!iopte_leaf(pte, lvl)) {
/* Also flush any partial walks */ /* Also flush any partial walks */
tlb->tlb_add_flush(iova, size, false, cookie); tlb->tlb_add_flush(iova, size, ARM_LPAE_GRANULE(data),
false, cookie);
tlb->tlb_sync(cookie); tlb->tlb_sync(cookie);
ptep = iopte_deref(pte, data); ptep = iopte_deref(pte, data);
__arm_lpae_free_pgtable(data, lvl + 1, ptep); __arm_lpae_free_pgtable(data, lvl + 1, ptep);
} else { } else {
tlb->tlb_add_flush(iova, size, true, cookie); tlb->tlb_add_flush(iova, size, size, true, cookie);
} }
return size; return size;
...@@ -570,7 +573,7 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, ...@@ -570,7 +573,7 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
return 0; return 0;
found_translation: found_translation:
iova &= ((1 << data->pg_shift) - 1); iova &= (ARM_LPAE_GRANULE(data) - 1);
return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova; return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova;
} }
...@@ -668,7 +671,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) ...@@ -668,7 +671,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
(ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) |
(ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT);
switch (1 << data->pg_shift) { switch (ARM_LPAE_GRANULE(data)) {
case SZ_4K: case SZ_4K:
reg |= ARM_LPAE_TCR_TG0_4K; reg |= ARM_LPAE_TCR_TG0_4K;
break; break;
...@@ -769,7 +772,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) ...@@ -769,7 +772,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
sl = ARM_LPAE_START_LVL(data); sl = ARM_LPAE_START_LVL(data);
switch (1 << data->pg_shift) { switch (ARM_LPAE_GRANULE(data)) {
case SZ_4K: case SZ_4K:
reg |= ARM_LPAE_TCR_TG0_4K; reg |= ARM_LPAE_TCR_TG0_4K;
sl++; /* SL0 format is different for 4K granule size */ sl++; /* SL0 format is different for 4K granule size */
...@@ -889,8 +892,8 @@ static void dummy_tlb_flush_all(void *cookie) ...@@ -889,8 +892,8 @@ static void dummy_tlb_flush_all(void *cookie)
WARN_ON(cookie != cfg_cookie); WARN_ON(cookie != cfg_cookie);
} }
static void dummy_tlb_add_flush(unsigned long iova, size_t size, bool leaf, static void dummy_tlb_add_flush(unsigned long iova, size_t size,
void *cookie) size_t granule, bool leaf, void *cookie)
{ {
WARN_ON(cookie != cfg_cookie); WARN_ON(cookie != cfg_cookie);
WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
......
...@@ -26,8 +26,8 @@ enum io_pgtable_fmt { ...@@ -26,8 +26,8 @@ enum io_pgtable_fmt {
*/ */
struct iommu_gather_ops { struct iommu_gather_ops {
void (*tlb_flush_all)(void *cookie); void (*tlb_flush_all)(void *cookie);
void (*tlb_add_flush)(unsigned long iova, size_t size, bool leaf, void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule,
void *cookie); bool leaf, void *cookie);
void (*tlb_sync)(void *cookie); void (*tlb_sync)(void *cookie);
}; };
...@@ -131,6 +131,8 @@ struct io_pgtable { ...@@ -131,6 +131,8 @@ struct io_pgtable {
struct io_pgtable_ops ops; struct io_pgtable_ops ops;
}; };
#define io_pgtable_ops_to_pgtable(x) container_of((x), struct io_pgtable, ops)
/** /**
* struct io_pgtable_init_fns - Alloc/free a set of page tables for a * struct io_pgtable_init_fns - Alloc/free a set of page tables for a
* particular format. * particular format.
......
...@@ -277,8 +277,8 @@ static void ipmmu_tlb_flush_all(void *cookie) ...@@ -277,8 +277,8 @@ static void ipmmu_tlb_flush_all(void *cookie)
ipmmu_tlb_invalidate(domain); ipmmu_tlb_invalidate(domain);
} }
static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, bool leaf, static void ipmmu_tlb_add_flush(unsigned long iova, size_t size,
void *cookie) size_t granule, bool leaf, void *cookie)
{ {
/* The hardware doesn't support selective TLB flush. */ /* The hardware doesn't support selective TLB flush. */
} }
......
...@@ -359,30 +359,19 @@ static struct platform_driver msm_iommu_ctx_driver = { ...@@ -359,30 +359,19 @@ static struct platform_driver msm_iommu_ctx_driver = {
.remove = msm_iommu_ctx_remove, .remove = msm_iommu_ctx_remove,
}; };
static struct platform_driver * const drivers[] = {
&msm_iommu_driver,
&msm_iommu_ctx_driver,
};
static int __init msm_iommu_driver_init(void) static int __init msm_iommu_driver_init(void)
{ {
int ret; return platform_register_drivers(drivers, ARRAY_SIZE(drivers));
ret = platform_driver_register(&msm_iommu_driver);
if (ret != 0) {
pr_err("Failed to register IOMMU driver\n");
goto error;
}
ret = platform_driver_register(&msm_iommu_ctx_driver);
if (ret != 0) {
platform_driver_unregister(&msm_iommu_driver);
pr_err("Failed to register IOMMU context driver\n");
goto error;
}
error:
return ret;
} }
static void __exit msm_iommu_driver_exit(void) static void __exit msm_iommu_driver_exit(void)
{ {
platform_driver_unregister(&msm_iommu_ctx_driver); platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
platform_driver_unregister(&msm_iommu_driver);
} }
subsys_initcall(msm_iommu_driver_init); subsys_initcall(msm_iommu_driver_init);
......
...@@ -49,7 +49,7 @@ static bool s390_iommu_capable(enum iommu_cap cap) ...@@ -49,7 +49,7 @@ static bool s390_iommu_capable(enum iommu_cap cap)
} }
} }
struct iommu_domain *s390_domain_alloc(unsigned domain_type) static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
{ {
struct s390_domain *s390_domain; struct s390_domain *s390_domain;
...@@ -73,7 +73,7 @@ struct iommu_domain *s390_domain_alloc(unsigned domain_type) ...@@ -73,7 +73,7 @@ struct iommu_domain *s390_domain_alloc(unsigned domain_type)
return &s390_domain->domain; return &s390_domain->domain;
} }
void s390_domain_free(struct iommu_domain *domain) static void s390_domain_free(struct iommu_domain *domain)
{ {
struct s390_domain *s390_domain = to_s390_domain(domain); struct s390_domain *s390_domain = to_s390_domain(domain);
......
/*
* IOMMU for IPMMU/IPMMUI
* Copyright (C) 2012 Hideki EIRAKU
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*/
#include <linux/dma-mapping.h>
#include <linux/io.h>
#include <linux/iommu.h>
#include <linux/platform_device.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include <asm/dma-iommu.h>
#include "shmobile-ipmmu.h"
#define L1_SIZE CONFIG_SHMOBILE_IOMMU_L1SIZE
#define L1_LEN (L1_SIZE / 4)
#define L1_ALIGN L1_SIZE
#define L2_SIZE SZ_1K
#define L2_LEN (L2_SIZE / 4)
#define L2_ALIGN L2_SIZE
struct shmobile_iommu_domain_pgtable {
uint32_t *pgtable;
dma_addr_t handle;
};
struct shmobile_iommu_archdata {
struct list_head attached_list;
struct dma_iommu_mapping *iommu_mapping;
spinlock_t attach_lock;
struct shmobile_iommu_domain *attached;
int num_attached_devices;
struct shmobile_ipmmu *ipmmu;
};
struct shmobile_iommu_domain {
struct shmobile_iommu_domain_pgtable l1, l2[L1_LEN];
spinlock_t map_lock;
spinlock_t attached_list_lock;
struct list_head attached_list;
struct iommu_domain domain;
};
static struct shmobile_iommu_archdata *ipmmu_archdata;
static struct kmem_cache *l1cache, *l2cache;
static struct shmobile_iommu_domain *to_sh_domain(struct iommu_domain *dom)
{
return container_of(dom, struct shmobile_iommu_domain, domain);
}
static int pgtable_alloc(struct shmobile_iommu_domain_pgtable *pgtable,
struct kmem_cache *cache, size_t size)
{
pgtable->pgtable = kmem_cache_zalloc(cache, GFP_ATOMIC);
if (!pgtable->pgtable)
return -ENOMEM;
pgtable->handle = dma_map_single(NULL, pgtable->pgtable, size,
DMA_TO_DEVICE);
return 0;
}
static void pgtable_free(struct shmobile_iommu_domain_pgtable *pgtable,
struct kmem_cache *cache, size_t size)
{
dma_unmap_single(NULL, pgtable->handle, size, DMA_TO_DEVICE);
kmem_cache_free(cache, pgtable->pgtable);
}
static uint32_t pgtable_read(struct shmobile_iommu_domain_pgtable *pgtable,
unsigned int index)
{
return pgtable->pgtable[index];
}
static void pgtable_write(struct shmobile_iommu_domain_pgtable *pgtable,
unsigned int index, unsigned int count, uint32_t val)
{
unsigned int i;
for (i = 0; i < count; i++)
pgtable->pgtable[index + i] = val;
dma_sync_single_for_device(NULL, pgtable->handle + index * sizeof(val),
sizeof(val) * count, DMA_TO_DEVICE);
}
static struct iommu_domain *shmobile_iommu_domain_alloc(unsigned type)
{
struct shmobile_iommu_domain *sh_domain;
int i, ret;
if (type != IOMMU_DOMAIN_UNMANAGED)
return NULL;
sh_domain = kzalloc(sizeof(*sh_domain), GFP_KERNEL);
if (!sh_domain)
return NULL;
ret = pgtable_alloc(&sh_domain->l1, l1cache, L1_SIZE);
if (ret < 0) {
kfree(sh_domain);
return NULL;
}
for (i = 0; i < L1_LEN; i++)
sh_domain->l2[i].pgtable = NULL;
spin_lock_init(&sh_domain->map_lock);
spin_lock_init(&sh_domain->attached_list_lock);
INIT_LIST_HEAD(&sh_domain->attached_list);
return &sh_domain->domain;
}
static void shmobile_iommu_domain_free(struct iommu_domain *domain)
{
struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain);
int i;
for (i = 0; i < L1_LEN; i++) {
if (sh_domain->l2[i].pgtable)
pgtable_free(&sh_domain->l2[i], l2cache, L2_SIZE);
}
pgtable_free(&sh_domain->l1, l1cache, L1_SIZE);
kfree(sh_domain);
}
static int shmobile_iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
struct shmobile_iommu_archdata *archdata = dev->archdata.iommu;
struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain);
int ret = -EBUSY;
if (!archdata)
return -ENODEV;
spin_lock(&sh_domain->attached_list_lock);
spin_lock(&archdata->attach_lock);
if (archdata->attached != sh_domain) {
if (archdata->attached)
goto err;
ipmmu_tlb_set(archdata->ipmmu, sh_domain->l1.handle, L1_SIZE,
0);
ipmmu_tlb_flush(archdata->ipmmu);
archdata->attached = sh_domain;
archdata->num_attached_devices = 0;
list_add(&archdata->attached_list, &sh_domain->attached_list);
}
archdata->num_attached_devices++;
ret = 0;
err:
spin_unlock(&archdata->attach_lock);
spin_unlock(&sh_domain->attached_list_lock);
return ret;
}
static void shmobile_iommu_detach_device(struct iommu_domain *domain,
struct device *dev)
{
struct shmobile_iommu_archdata *archdata = dev->archdata.iommu;
struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain);
if (!archdata)
return;
spin_lock(&sh_domain->attached_list_lock);
spin_lock(&archdata->attach_lock);
archdata->num_attached_devices--;
if (!archdata->num_attached_devices) {
ipmmu_tlb_set(archdata->ipmmu, 0, 0, 0);
ipmmu_tlb_flush(archdata->ipmmu);
archdata->attached = NULL;
list_del(&archdata->attached_list);
}
spin_unlock(&archdata->attach_lock);
spin_unlock(&sh_domain->attached_list_lock);
}
static void domain_tlb_flush(struct shmobile_iommu_domain *sh_domain)
{
struct shmobile_iommu_archdata *archdata;
spin_lock(&sh_domain->attached_list_lock);
list_for_each_entry(archdata, &sh_domain->attached_list, attached_list)
ipmmu_tlb_flush(archdata->ipmmu);
spin_unlock(&sh_domain->attached_list_lock);
}
static int l2alloc(struct shmobile_iommu_domain *sh_domain,
unsigned int l1index)
{
int ret;
if (!sh_domain->l2[l1index].pgtable) {
ret = pgtable_alloc(&sh_domain->l2[l1index], l2cache, L2_SIZE);
if (ret < 0)
return ret;
}
pgtable_write(&sh_domain->l1, l1index, 1,
sh_domain->l2[l1index].handle | 0x1);
return 0;
}
static void l2realfree(struct shmobile_iommu_domain_pgtable *l2)
{
if (l2->pgtable)
pgtable_free(l2, l2cache, L2_SIZE);
}
static void l2free(struct shmobile_iommu_domain *sh_domain,
unsigned int l1index,
struct shmobile_iommu_domain_pgtable *l2)
{
pgtable_write(&sh_domain->l1, l1index, 1, 0);
if (sh_domain->l2[l1index].pgtable) {
*l2 = sh_domain->l2[l1index];
sh_domain->l2[l1index].pgtable = NULL;
}
}
static int shmobile_iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
struct shmobile_iommu_domain_pgtable l2 = { .pgtable = NULL };
struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain);
unsigned int l1index, l2index;
int ret;
l1index = iova >> 20;
switch (size) {
case SZ_4K:
l2index = (iova >> 12) & 0xff;
spin_lock(&sh_domain->map_lock);
ret = l2alloc(sh_domain, l1index);
if (!ret)
pgtable_write(&sh_domain->l2[l1index], l2index, 1,
paddr | 0xff2);
spin_unlock(&sh_domain->map_lock);
break;
case SZ_64K:
l2index = (iova >> 12) & 0xf0;
spin_lock(&sh_domain->map_lock);
ret = l2alloc(sh_domain, l1index);
if (!ret)
pgtable_write(&sh_domain->l2[l1index], l2index, 0x10,
paddr | 0xff1);
spin_unlock(&sh_domain->map_lock);
break;
case SZ_1M:
spin_lock(&sh_domain->map_lock);
l2free(sh_domain, l1index, &l2);
pgtable_write(&sh_domain->l1, l1index, 1, paddr | 0xc02);
spin_unlock(&sh_domain->map_lock);
ret = 0;
break;
default:
ret = -EINVAL;
}
if (!ret)
domain_tlb_flush(sh_domain);
l2realfree(&l2);
return ret;
}
static size_t shmobile_iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
struct shmobile_iommu_domain_pgtable l2 = { .pgtable = NULL };
struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain);
unsigned int l1index, l2index;
uint32_t l2entry = 0;
size_t ret = 0;
l1index = iova >> 20;
if (!(iova & 0xfffff) && size >= SZ_1M) {
spin_lock(&sh_domain->map_lock);
l2free(sh_domain, l1index, &l2);
spin_unlock(&sh_domain->map_lock);
ret = SZ_1M;
goto done;
}
l2index = (iova >> 12) & 0xff;
spin_lock(&sh_domain->map_lock);
if (sh_domain->l2[l1index].pgtable)
l2entry = pgtable_read(&sh_domain->l2[l1index], l2index);
switch (l2entry & 3) {
case 1:
if (l2index & 0xf)
break;
pgtable_write(&sh_domain->l2[l1index], l2index, 0x10, 0);
ret = SZ_64K;
break;
case 2:
pgtable_write(&sh_domain->l2[l1index], l2index, 1, 0);
ret = SZ_4K;
break;
}
spin_unlock(&sh_domain->map_lock);
done:
if (ret)
domain_tlb_flush(sh_domain);
l2realfree(&l2);
return ret;
}
static phys_addr_t shmobile_iommu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
{
struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain);
uint32_t l1entry = 0, l2entry = 0;
unsigned int l1index, l2index;
l1index = iova >> 20;
l2index = (iova >> 12) & 0xff;
spin_lock(&sh_domain->map_lock);
if (sh_domain->l2[l1index].pgtable)
l2entry = pgtable_read(&sh_domain->l2[l1index], l2index);
else
l1entry = pgtable_read(&sh_domain->l1, l1index);
spin_unlock(&sh_domain->map_lock);
switch (l2entry & 3) {
case 1:
return (l2entry & ~0xffff) | (iova & 0xffff);
case 2:
return (l2entry & ~0xfff) | (iova & 0xfff);
default:
if ((l1entry & 3) == 2)
return (l1entry & ~0xfffff) | (iova & 0xfffff);
return 0;
}
}
static int find_dev_name(struct shmobile_ipmmu *ipmmu, const char *dev_name)
{
unsigned int i, n = ipmmu->num_dev_names;
for (i = 0; i < n; i++) {
if (strcmp(ipmmu->dev_names[i], dev_name) == 0)
return 1;
}
return 0;
}
static int shmobile_iommu_add_device(struct device *dev)
{
struct shmobile_iommu_archdata *archdata = ipmmu_archdata;
struct dma_iommu_mapping *mapping;
if (!find_dev_name(archdata->ipmmu, dev_name(dev)))
return 0;
mapping = archdata->iommu_mapping;
if (!mapping) {
mapping = arm_iommu_create_mapping(&platform_bus_type, 0,
L1_LEN << 20);
if (IS_ERR(mapping))
return PTR_ERR(mapping);
archdata->iommu_mapping = mapping;
}
dev->archdata.iommu = archdata;
if (arm_iommu_attach_device(dev, mapping))
pr_err("arm_iommu_attach_device failed\n");
return 0;
}
static const struct iommu_ops shmobile_iommu_ops = {
.domain_alloc = shmobile_iommu_domain_alloc,
.domain_free = shmobile_iommu_domain_free,
.attach_dev = shmobile_iommu_attach_device,
.detach_dev = shmobile_iommu_detach_device,
.map = shmobile_iommu_map,
.unmap = shmobile_iommu_unmap,
.map_sg = default_iommu_map_sg,
.iova_to_phys = shmobile_iommu_iova_to_phys,
.add_device = shmobile_iommu_add_device,
.pgsize_bitmap = SZ_1M | SZ_64K | SZ_4K,
};
int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu)
{
static struct shmobile_iommu_archdata *archdata;
l1cache = kmem_cache_create("shmobile-iommu-pgtable1", L1_SIZE,
L1_ALIGN, SLAB_HWCACHE_ALIGN, NULL);
if (!l1cache)
return -ENOMEM;
l2cache = kmem_cache_create("shmobile-iommu-pgtable2", L2_SIZE,
L2_ALIGN, SLAB_HWCACHE_ALIGN, NULL);
if (!l2cache) {
kmem_cache_destroy(l1cache);
return -ENOMEM;
}
archdata = kzalloc(sizeof(*archdata), GFP_KERNEL);
if (!archdata) {
kmem_cache_destroy(l1cache);
kmem_cache_destroy(l2cache);
return -ENOMEM;
}
spin_lock_init(&archdata->attach_lock);
archdata->ipmmu = ipmmu;
ipmmu_archdata = archdata;
bus_set_iommu(&platform_bus_type, &shmobile_iommu_ops);
return 0;
}
/*
* IPMMU/IPMMUI
* Copyright (C) 2012 Hideki EIRAKU
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*/
#include <linux/err.h>
#include <linux/export.h>
#include <linux/io.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/platform_data/sh_ipmmu.h>
#include "shmobile-ipmmu.h"
#define IMCTR1 0x000
#define IMCTR2 0x004
#define IMASID 0x010
#define IMTTBR 0x014
#define IMTTBCR 0x018
#define IMCTR1_TLBEN (1 << 0)
#define IMCTR1_FLUSH (1 << 1)
static void ipmmu_reg_write(struct shmobile_ipmmu *ipmmu, unsigned long reg_off,
unsigned long data)
{
iowrite32(data, ipmmu->ipmmu_base + reg_off);
}
void ipmmu_tlb_flush(struct shmobile_ipmmu *ipmmu)
{
if (!ipmmu)
return;
spin_lock(&ipmmu->flush_lock);
if (ipmmu->tlb_enabled)
ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH | IMCTR1_TLBEN);
else
ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH);
spin_unlock(&ipmmu->flush_lock);
}
void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size,
int asid)
{
if (!ipmmu)
return;
spin_lock(&ipmmu->flush_lock);
switch (size) {
default:
ipmmu->tlb_enabled = 0;
break;
case 0x2000:
ipmmu_reg_write(ipmmu, IMTTBCR, 1);
ipmmu->tlb_enabled = 1;
break;
case 0x1000:
ipmmu_reg_write(ipmmu, IMTTBCR, 2);
ipmmu->tlb_enabled = 1;
break;
case 0x800:
ipmmu_reg_write(ipmmu, IMTTBCR, 3);
ipmmu->tlb_enabled = 1;
break;
case 0x400:
ipmmu_reg_write(ipmmu, IMTTBCR, 4);
ipmmu->tlb_enabled = 1;
break;
case 0x200:
ipmmu_reg_write(ipmmu, IMTTBCR, 5);
ipmmu->tlb_enabled = 1;
break;
case 0x100:
ipmmu_reg_write(ipmmu, IMTTBCR, 6);
ipmmu->tlb_enabled = 1;
break;
case 0x80:
ipmmu_reg_write(ipmmu, IMTTBCR, 7);
ipmmu->tlb_enabled = 1;
break;
}
ipmmu_reg_write(ipmmu, IMTTBR, phys);
ipmmu_reg_write(ipmmu, IMASID, asid);
spin_unlock(&ipmmu->flush_lock);
}
static int ipmmu_probe(struct platform_device *pdev)
{
struct shmobile_ipmmu *ipmmu;
struct resource *res;
struct shmobile_ipmmu_platform_data *pdata = pdev->dev.platform_data;
ipmmu = devm_kzalloc(&pdev->dev, sizeof(*ipmmu), GFP_KERNEL);
if (!ipmmu) {
dev_err(&pdev->dev, "cannot allocate device data\n");
return -ENOMEM;
}
spin_lock_init(&ipmmu->flush_lock);
ipmmu->dev = &pdev->dev;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
ipmmu->ipmmu_base = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(ipmmu->ipmmu_base))
return PTR_ERR(ipmmu->ipmmu_base);
ipmmu->dev_names = pdata->dev_names;
ipmmu->num_dev_names = pdata->num_dev_names;
platform_set_drvdata(pdev, ipmmu);
ipmmu_reg_write(ipmmu, IMCTR1, 0x0); /* disable TLB */
ipmmu_reg_write(ipmmu, IMCTR2, 0x0); /* disable PMB */
return ipmmu_iommu_init(ipmmu);
}
static struct platform_driver ipmmu_driver = {
.probe = ipmmu_probe,
.driver = {
.name = "ipmmu",
},
};
static int __init ipmmu_init(void)
{
return platform_driver_register(&ipmmu_driver);
}
subsys_initcall(ipmmu_init);
/* shmobile-ipmmu.h
*
* Copyright (C) 2012 Hideki EIRAKU
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*/
#ifndef __SHMOBILE_IPMMU_H__
#define __SHMOBILE_IPMMU_H__
struct shmobile_ipmmu {
struct device *dev;
void __iomem *ipmmu_base;
int tlb_enabled;
spinlock_t flush_lock;
const char * const *dev_names;
unsigned int num_dev_names;
};
#ifdef CONFIG_SHMOBILE_IPMMU_TLB
void ipmmu_tlb_flush(struct shmobile_ipmmu *ipmmu);
void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size,
int asid);
int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu);
#else
static inline int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu)
{
return -EINVAL;
}
#endif
#endif /* __SHMOBILE_IPMMU_H__ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment