Commit bb620c3d authored by Sowmini Varadhan's avatar Sowmini Varadhan Committed by David S. Miller

sparc: Make sparc64 use scalable lib/iommu-common.c functions

In iperf experiments running linux as the Tx side (TCP client) with
10 threads results in a severe performance drop when TSO is disabled,
indicating a weakness in the software that can be avoided by using
the scalable IOMMU arena DMA allocation.

Baseline numbers before this patch:
   with default settings (TSO enabled) :    9-9.5 Gbps
   Disable TSO using ethtool- drops badly:  2-3 Gbps.

After this patch, iperf client with 10 threads, can give a
throughput of at least 8.5 Gbps, even when TSO is disabled.
Signed-off-by: default avatarSowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent ff7d37a5
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#define IOPTE_WRITE 0x0000000000000002UL #define IOPTE_WRITE 0x0000000000000002UL
#define IOMMU_NUM_CTXS 4096 #define IOMMU_NUM_CTXS 4096
#include <linux/iommu-common.h>
struct iommu_arena { struct iommu_arena {
unsigned long *map; unsigned long *map;
...@@ -24,11 +25,10 @@ struct iommu_arena { ...@@ -24,11 +25,10 @@ struct iommu_arena {
}; };
struct iommu { struct iommu {
struct iommu_map_table tbl;
spinlock_t lock; spinlock_t lock;
struct iommu_arena arena; u32 dma_addr_mask;
void (*flush_all)(struct iommu *);
iopte_t *page_table; iopte_t *page_table;
u32 page_table_map_base;
unsigned long iommu_control; unsigned long iommu_control;
unsigned long iommu_tsbbase; unsigned long iommu_tsbbase;
unsigned long iommu_flush; unsigned long iommu_flush;
...@@ -40,7 +40,6 @@ struct iommu { ...@@ -40,7 +40,6 @@ struct iommu {
unsigned long dummy_page_pa; unsigned long dummy_page_pa;
unsigned long ctx_lowest_free; unsigned long ctx_lowest_free;
DECLARE_BITMAP(ctx_bitmap, IOMMU_NUM_CTXS); DECLARE_BITMAP(ctx_bitmap, IOMMU_NUM_CTXS);
u32 dma_addr_mask;
}; };
struct strbuf { struct strbuf {
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/iommu-helper.h> #include <linux/iommu-helper.h>
#include <linux/bitmap.h> #include <linux/bitmap.h>
#include <linux/iommu-common.h>
#ifdef CONFIG_PCI #ifdef CONFIG_PCI
#include <linux/pci.h> #include <linux/pci.h>
...@@ -45,8 +46,9 @@ ...@@ -45,8 +46,9 @@
"i" (ASI_PHYS_BYPASS_EC_E)) "i" (ASI_PHYS_BYPASS_EC_E))
/* Must be invoked under the IOMMU lock. */ /* Must be invoked under the IOMMU lock. */
static void iommu_flushall(struct iommu *iommu) static void iommu_flushall(struct iommu_map_table *iommu_map_table)
{ {
struct iommu *iommu = container_of(iommu_map_table, struct iommu, tbl);
if (iommu->iommu_flushinv) { if (iommu->iommu_flushinv) {
iommu_write(iommu->iommu_flushinv, ~(u64)0); iommu_write(iommu->iommu_flushinv, ~(u64)0);
} else { } else {
...@@ -87,94 +89,6 @@ static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte) ...@@ -87,94 +89,6 @@ static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte)
iopte_val(*iopte) = val; iopte_val(*iopte) = val;
} }
/* Based almost entirely upon the ppc64 iommu allocator. If you use the 'handle'
* facility it must all be done in one pass while under the iommu lock.
*
* On sun4u platforms, we only flush the IOMMU once every time we've passed
* over the entire page table doing allocations. Therefore we only ever advance
* the hint and cannot backtrack it.
*/
unsigned long iommu_range_alloc(struct device *dev,
struct iommu *iommu,
unsigned long npages,
unsigned long *handle)
{
unsigned long n, end, start, limit, boundary_size;
struct iommu_arena *arena = &iommu->arena;
int pass = 0;
/* This allocator was derived from x86_64's bit string search */
/* Sanity check */
if (unlikely(npages == 0)) {
if (printk_ratelimit())
WARN_ON(1);
return DMA_ERROR_CODE;
}
if (handle && *handle)
start = *handle;
else
start = arena->hint;
limit = arena->limit;
/* The case below can happen if we have a small segment appended
* to a large, or when the previous alloc was at the very end of
* the available space. If so, go back to the beginning and flush.
*/
if (start >= limit) {
start = 0;
if (iommu->flush_all)
iommu->flush_all(iommu);
}
again:
if (dev)
boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
1 << IO_PAGE_SHIFT);
else
boundary_size = ALIGN(1UL << 32, 1 << IO_PAGE_SHIFT);
n = iommu_area_alloc(arena->map, limit, start, npages,
iommu->page_table_map_base >> IO_PAGE_SHIFT,
boundary_size >> IO_PAGE_SHIFT, 0);
if (n == -1) {
if (likely(pass < 1)) {
/* First failure, rescan from the beginning. */
start = 0;
if (iommu->flush_all)
iommu->flush_all(iommu);
pass++;
goto again;
} else {
/* Second failure, give up */
return DMA_ERROR_CODE;
}
}
end = n + npages;
arena->hint = end;
/* Update handle for SG allocations */
if (handle)
*handle = end;
return n;
}
void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long npages)
{
struct iommu_arena *arena = &iommu->arena;
unsigned long entry;
entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
bitmap_clear(arena->map, entry, npages);
}
int iommu_table_init(struct iommu *iommu, int tsbsize, int iommu_table_init(struct iommu *iommu, int tsbsize,
u32 dma_offset, u32 dma_addr_mask, u32 dma_offset, u32 dma_addr_mask,
int numa_node) int numa_node)
...@@ -187,22 +101,20 @@ int iommu_table_init(struct iommu *iommu, int tsbsize, ...@@ -187,22 +101,20 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
/* Setup initial software IOMMU state. */ /* Setup initial software IOMMU state. */
spin_lock_init(&iommu->lock); spin_lock_init(&iommu->lock);
iommu->ctx_lowest_free = 1; iommu->ctx_lowest_free = 1;
iommu->page_table_map_base = dma_offset; iommu->tbl.table_map_base = dma_offset;
iommu->dma_addr_mask = dma_addr_mask; iommu->dma_addr_mask = dma_addr_mask;
/* Allocate and initialize the free area map. */ /* Allocate and initialize the free area map. */
sz = num_tsb_entries / 8; sz = num_tsb_entries / 8;
sz = (sz + 7UL) & ~7UL; sz = (sz + 7UL) & ~7UL;
iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node); iommu->tbl.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
if (!iommu->arena.map) { if (!iommu->tbl.map)
printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n");
return -ENOMEM; return -ENOMEM;
} memset(iommu->tbl.map, 0, sz);
memset(iommu->arena.map, 0, sz);
iommu->arena.limit = num_tsb_entries;
if (tlb_type != hypervisor) iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
iommu->flush_all = iommu_flushall; (tlb_type != hypervisor ? iommu_flushall : NULL),
false, 1, false);
/* Allocate and initialize the dummy page which we /* Allocate and initialize the dummy page which we
* set inactive IO PTEs to point to. * set inactive IO PTEs to point to.
...@@ -235,18 +147,20 @@ int iommu_table_init(struct iommu *iommu, int tsbsize, ...@@ -235,18 +147,20 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
iommu->dummy_page = 0UL; iommu->dummy_page = 0UL;
out_free_map: out_free_map:
kfree(iommu->arena.map); kfree(iommu->tbl.map);
iommu->arena.map = NULL; iommu->tbl.map = NULL;
return -ENOMEM; return -ENOMEM;
} }
static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu, static inline iopte_t *alloc_npages(struct device *dev,
struct iommu *iommu,
unsigned long npages) unsigned long npages)
{ {
unsigned long entry; unsigned long entry;
entry = iommu_range_alloc(dev, iommu, npages, NULL); entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
(unsigned long)(-1), 0);
if (unlikely(entry == DMA_ERROR_CODE)) if (unlikely(entry == DMA_ERROR_CODE))
return NULL; return NULL;
...@@ -284,7 +198,7 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size, ...@@ -284,7 +198,7 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addrp, gfp_t gfp, dma_addr_t *dma_addrp, gfp_t gfp,
struct dma_attrs *attrs) struct dma_attrs *attrs)
{ {
unsigned long flags, order, first_page; unsigned long order, first_page;
struct iommu *iommu; struct iommu *iommu;
struct page *page; struct page *page;
int npages, nid; int npages, nid;
...@@ -306,16 +220,14 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size, ...@@ -306,16 +220,14 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
iommu = dev->archdata.iommu; iommu = dev->archdata.iommu;
spin_lock_irqsave(&iommu->lock, flags);
iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT); iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
spin_unlock_irqrestore(&iommu->lock, flags);
if (unlikely(iopte == NULL)) { if (unlikely(iopte == NULL)) {
free_pages(first_page, order); free_pages(first_page, order);
return NULL; return NULL;
} }
*dma_addrp = (iommu->page_table_map_base + *dma_addrp = (iommu->tbl.table_map_base +
((iopte - iommu->page_table) << IO_PAGE_SHIFT)); ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
ret = (void *) first_page; ret = (void *) first_page;
npages = size >> IO_PAGE_SHIFT; npages = size >> IO_PAGE_SHIFT;
...@@ -336,16 +248,12 @@ static void dma_4u_free_coherent(struct device *dev, size_t size, ...@@ -336,16 +248,12 @@ static void dma_4u_free_coherent(struct device *dev, size_t size,
struct dma_attrs *attrs) struct dma_attrs *attrs)
{ {
struct iommu *iommu; struct iommu *iommu;
unsigned long flags, order, npages; unsigned long order, npages;
npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
iommu = dev->archdata.iommu; iommu = dev->archdata.iommu;
spin_lock_irqsave(&iommu->lock, flags); iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE);
iommu_range_free(iommu, dvma, npages);
spin_unlock_irqrestore(&iommu->lock, flags);
order = get_order(size); order = get_order(size);
if (order < 10) if (order < 10)
...@@ -375,8 +283,8 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page, ...@@ -375,8 +283,8 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
npages >>= IO_PAGE_SHIFT; npages >>= IO_PAGE_SHIFT;
spin_lock_irqsave(&iommu->lock, flags);
base = alloc_npages(dev, iommu, npages); base = alloc_npages(dev, iommu, npages);
spin_lock_irqsave(&iommu->lock, flags);
ctx = 0; ctx = 0;
if (iommu->iommu_ctxflush) if (iommu->iommu_ctxflush)
ctx = iommu_alloc_ctx(iommu); ctx = iommu_alloc_ctx(iommu);
...@@ -385,7 +293,7 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page, ...@@ -385,7 +293,7 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
if (unlikely(!base)) if (unlikely(!base))
goto bad; goto bad;
bus_addr = (iommu->page_table_map_base + bus_addr = (iommu->tbl.table_map_base +
((base - iommu->page_table) << IO_PAGE_SHIFT)); ((base - iommu->page_table) << IO_PAGE_SHIFT));
ret = bus_addr | (oaddr & ~IO_PAGE_MASK); ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
base_paddr = __pa(oaddr & IO_PAGE_MASK); base_paddr = __pa(oaddr & IO_PAGE_MASK);
...@@ -496,7 +404,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr, ...@@ -496,7 +404,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
npages >>= IO_PAGE_SHIFT; npages >>= IO_PAGE_SHIFT;
base = iommu->page_table + base = iommu->page_table +
((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); ((bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
bus_addr &= IO_PAGE_MASK; bus_addr &= IO_PAGE_MASK;
spin_lock_irqsave(&iommu->lock, flags); spin_lock_irqsave(&iommu->lock, flags);
...@@ -515,11 +423,10 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr, ...@@ -515,11 +423,10 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
for (i = 0; i < npages; i++) for (i = 0; i < npages; i++)
iopte_make_dummy(iommu, base + i); iopte_make_dummy(iommu, base + i);
iommu_range_free(iommu, bus_addr, npages);
iommu_free_ctx(iommu, ctx); iommu_free_ctx(iommu, ctx);
spin_unlock_irqrestore(&iommu->lock, flags); spin_unlock_irqrestore(&iommu->lock, flags);
iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
} }
static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
...@@ -567,7 +474,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, ...@@ -567,7 +474,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
max_seg_size = dma_get_max_seg_size(dev); max_seg_size = dma_get_max_seg_size(dev);
seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
IO_PAGE_SIZE) >> IO_PAGE_SHIFT; IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT; base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
for_each_sg(sglist, s, nelems, i) { for_each_sg(sglist, s, nelems, i) {
unsigned long paddr, npages, entry, out_entry = 0, slen; unsigned long paddr, npages, entry, out_entry = 0, slen;
iopte_t *base; iopte_t *base;
...@@ -581,7 +488,8 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, ...@@ -581,7 +488,8 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
/* Allocate iommu entries for that segment */ /* Allocate iommu entries for that segment */
paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE); npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
entry = iommu_range_alloc(dev, iommu, npages, &handle); entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
&handle, (unsigned long)(-1), 0);
/* Handle failure */ /* Handle failure */
if (unlikely(entry == DMA_ERROR_CODE)) { if (unlikely(entry == DMA_ERROR_CODE)) {
...@@ -594,7 +502,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, ...@@ -594,7 +502,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
base = iommu->page_table + entry; base = iommu->page_table + entry;
/* Convert entry to a dma_addr_t */ /* Convert entry to a dma_addr_t */
dma_addr = iommu->page_table_map_base + dma_addr = iommu->tbl.table_map_base +
(entry << IO_PAGE_SHIFT); (entry << IO_PAGE_SHIFT);
dma_addr |= (s->offset & ~IO_PAGE_MASK); dma_addr |= (s->offset & ~IO_PAGE_MASK);
...@@ -654,15 +562,17 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, ...@@ -654,15 +562,17 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
vaddr = s->dma_address & IO_PAGE_MASK; vaddr = s->dma_address & IO_PAGE_MASK;
npages = iommu_num_pages(s->dma_address, s->dma_length, npages = iommu_num_pages(s->dma_address, s->dma_length,
IO_PAGE_SIZE); IO_PAGE_SIZE);
iommu_range_free(iommu, vaddr, npages);
entry = (vaddr - iommu->page_table_map_base) entry = (vaddr - iommu->tbl.table_map_base)
>> IO_PAGE_SHIFT; >> IO_PAGE_SHIFT;
base = iommu->page_table + entry; base = iommu->page_table + entry;
for (j = 0; j < npages; j++) for (j = 0; j < npages; j++)
iopte_make_dummy(iommu, base + j); iopte_make_dummy(iommu, base + j);
iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
DMA_ERROR_CODE);
s->dma_address = DMA_ERROR_CODE; s->dma_address = DMA_ERROR_CODE;
s->dma_length = 0; s->dma_length = 0;
} }
...@@ -684,10 +594,11 @@ static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg) ...@@ -684,10 +594,11 @@ static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg)
if (iommu->iommu_ctxflush) { if (iommu->iommu_ctxflush) {
iopte_t *base; iopte_t *base;
u32 bus_addr; u32 bus_addr;
struct iommu_map_table *tbl = &iommu->tbl;
bus_addr = sg->dma_address & IO_PAGE_MASK; bus_addr = sg->dma_address & IO_PAGE_MASK;
base = iommu->page_table + base = iommu->page_table +
((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); ((bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT);
ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
} }
...@@ -723,9 +634,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, ...@@ -723,9 +634,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
if (!len) if (!len)
break; break;
npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE); npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
iommu_range_free(iommu, dma_handle, npages);
entry = ((dma_handle - iommu->page_table_map_base) entry = ((dma_handle - iommu->tbl.table_map_base)
>> IO_PAGE_SHIFT); >> IO_PAGE_SHIFT);
base = iommu->page_table + entry; base = iommu->page_table + entry;
...@@ -737,6 +647,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, ...@@ -737,6 +647,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
for (i = 0; i < npages; i++) for (i = 0; i < npages; i++)
iopte_make_dummy(iommu, base + i); iopte_make_dummy(iommu, base + i);
iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
DMA_ERROR_CODE);
sg = sg_next(sg); sg = sg_next(sg);
} }
...@@ -770,9 +682,10 @@ static void dma_4u_sync_single_for_cpu(struct device *dev, ...@@ -770,9 +682,10 @@ static void dma_4u_sync_single_for_cpu(struct device *dev,
if (iommu->iommu_ctxflush && if (iommu->iommu_ctxflush &&
strbuf->strbuf_ctxflush) { strbuf->strbuf_ctxflush) {
iopte_t *iopte; iopte_t *iopte;
struct iommu_map_table *tbl = &iommu->tbl;
iopte = iommu->page_table + iopte = iommu->page_table +
((bus_addr - iommu->page_table_map_base)>>IO_PAGE_SHIFT); ((bus_addr - tbl->table_map_base)>>IO_PAGE_SHIFT);
ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL; ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
} }
...@@ -805,9 +718,10 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev, ...@@ -805,9 +718,10 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
if (iommu->iommu_ctxflush && if (iommu->iommu_ctxflush &&
strbuf->strbuf_ctxflush) { strbuf->strbuf_ctxflush) {
iopte_t *iopte; iopte_t *iopte;
struct iommu_map_table *tbl = &iommu->tbl;
iopte = iommu->page_table + iopte = iommu->page_table + ((sglist[0].dma_address -
((sglist[0].dma_address - iommu->page_table_map_base) >> IO_PAGE_SHIFT); tbl->table_map_base) >> IO_PAGE_SHIFT);
ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL; ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
} }
......
...@@ -48,12 +48,4 @@ static inline int is_span_boundary(unsigned long entry, ...@@ -48,12 +48,4 @@ static inline int is_span_boundary(unsigned long entry,
return iommu_is_span_boundary(entry, nr, shift, boundary_size); return iommu_is_span_boundary(entry, nr, shift, boundary_size);
} }
unsigned long iommu_range_alloc(struct device *dev,
struct iommu *iommu,
unsigned long npages,
unsigned long *handle);
void iommu_range_free(struct iommu *iommu,
dma_addr_t dma_addr,
unsigned long npages);
#endif /* _IOMMU_COMMON_H */ #endif /* _IOMMU_COMMON_H */
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/export.h> #include <linux/export.h>
#include <linux/log2.h> #include <linux/log2.h>
#include <linux/of_device.h> #include <linux/of_device.h>
#include <linux/iommu-common.h>
#include <asm/iommu.h> #include <asm/iommu.h>
#include <asm/irq.h> #include <asm/irq.h>
...@@ -155,15 +156,13 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, ...@@ -155,15 +156,13 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
iommu = dev->archdata.iommu; iommu = dev->archdata.iommu;
spin_lock_irqsave(&iommu->lock, flags); entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
entry = iommu_range_alloc(dev, iommu, npages, NULL); (unsigned long)(-1), 0);
spin_unlock_irqrestore(&iommu->lock, flags);
if (unlikely(entry == DMA_ERROR_CODE)) if (unlikely(entry == DMA_ERROR_CODE))
goto range_alloc_fail; goto range_alloc_fail;
*dma_addrp = (iommu->page_table_map_base + *dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
(entry << IO_PAGE_SHIFT));
ret = (void *) first_page; ret = (void *) first_page;
first_page = __pa(first_page); first_page = __pa(first_page);
...@@ -188,45 +187,46 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, ...@@ -188,45 +187,46 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
return ret; return ret;
iommu_map_fail: iommu_map_fail:
/* Interrupts are disabled. */ iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, DMA_ERROR_CODE);
spin_lock(&iommu->lock);
iommu_range_free(iommu, *dma_addrp, npages);
spin_unlock_irqrestore(&iommu->lock, flags);
range_alloc_fail: range_alloc_fail:
free_pages(first_page, order); free_pages(first_page, order);
return NULL; return NULL;
} }
static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry,
unsigned long npages)
{
u32 devhandle = *(u32 *)demap_arg;
unsigned long num, flags;
local_irq_save(flags);
do {
num = pci_sun4v_iommu_demap(devhandle,
HV_PCI_TSBID(0, entry),
npages);
entry += num;
npages -= num;
} while (npages != 0);
local_irq_restore(flags);
}
static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu, static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
dma_addr_t dvma, struct dma_attrs *attrs) dma_addr_t dvma, struct dma_attrs *attrs)
{ {
struct pci_pbm_info *pbm; struct pci_pbm_info *pbm;
struct iommu *iommu; struct iommu *iommu;
unsigned long flags, order, npages, entry; unsigned long order, npages, entry;
u32 devhandle; u32 devhandle;
npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
iommu = dev->archdata.iommu; iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller; pbm = dev->archdata.host_controller;
devhandle = pbm->devhandle; devhandle = pbm->devhandle;
entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT); entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
dma_4v_iommu_demap(&devhandle, entry, npages);
spin_lock_irqsave(&iommu->lock, flags); iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE);
iommu_range_free(iommu, dvma, npages);
do {
unsigned long num;
num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
npages);
entry += num;
npages -= num;
} while (npages != 0);
spin_unlock_irqrestore(&iommu->lock, flags);
order = get_order(size); order = get_order(size);
if (order < 10) if (order < 10)
free_pages((unsigned long)cpu, order); free_pages((unsigned long)cpu, order);
...@@ -253,15 +253,13 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, ...@@ -253,15 +253,13 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
npages >>= IO_PAGE_SHIFT; npages >>= IO_PAGE_SHIFT;
spin_lock_irqsave(&iommu->lock, flags); entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
entry = iommu_range_alloc(dev, iommu, npages, NULL); (unsigned long)(-1), 0);
spin_unlock_irqrestore(&iommu->lock, flags);
if (unlikely(entry == DMA_ERROR_CODE)) if (unlikely(entry == DMA_ERROR_CODE))
goto bad; goto bad;
bus_addr = (iommu->page_table_map_base + bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
(entry << IO_PAGE_SHIFT));
ret = bus_addr | (oaddr & ~IO_PAGE_MASK); ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
base_paddr = __pa(oaddr & IO_PAGE_MASK); base_paddr = __pa(oaddr & IO_PAGE_MASK);
prot = HV_PCI_MAP_ATTR_READ; prot = HV_PCI_MAP_ATTR_READ;
...@@ -290,11 +288,7 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, ...@@ -290,11 +288,7 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
return DMA_ERROR_CODE; return DMA_ERROR_CODE;
iommu_map_fail: iommu_map_fail:
/* Interrupts are disabled. */ iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
spin_lock(&iommu->lock);
iommu_range_free(iommu, bus_addr, npages);
spin_unlock_irqrestore(&iommu->lock, flags);
return DMA_ERROR_CODE; return DMA_ERROR_CODE;
} }
...@@ -304,7 +298,7 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, ...@@ -304,7 +298,7 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
{ {
struct pci_pbm_info *pbm; struct pci_pbm_info *pbm;
struct iommu *iommu; struct iommu *iommu;
unsigned long flags, npages; unsigned long npages;
long entry; long entry;
u32 devhandle; u32 devhandle;
...@@ -321,22 +315,9 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, ...@@ -321,22 +315,9 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
npages >>= IO_PAGE_SHIFT; npages >>= IO_PAGE_SHIFT;
bus_addr &= IO_PAGE_MASK; bus_addr &= IO_PAGE_MASK;
entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT;
spin_lock_irqsave(&iommu->lock, flags); dma_4v_iommu_demap(&devhandle, entry, npages);
iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
iommu_range_free(iommu, bus_addr, npages);
entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
do {
unsigned long num;
num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
npages);
entry += num;
npages -= num;
} while (npages != 0);
spin_unlock_irqrestore(&iommu->lock, flags);
} }
static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
...@@ -371,14 +352,14 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, ...@@ -371,14 +352,14 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
/* Init first segment length for backout at failure */ /* Init first segment length for backout at failure */
outs->dma_length = 0; outs->dma_length = 0;
spin_lock_irqsave(&iommu->lock, flags); local_irq_save(flags);
iommu_batch_start(dev, prot, ~0UL); iommu_batch_start(dev, prot, ~0UL);
max_seg_size = dma_get_max_seg_size(dev); max_seg_size = dma_get_max_seg_size(dev);
seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
IO_PAGE_SIZE) >> IO_PAGE_SHIFT; IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT; base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
for_each_sg(sglist, s, nelems, i) { for_each_sg(sglist, s, nelems, i) {
unsigned long paddr, npages, entry, out_entry = 0, slen; unsigned long paddr, npages, entry, out_entry = 0, slen;
...@@ -391,7 +372,8 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, ...@@ -391,7 +372,8 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
/* Allocate iommu entries for that segment */ /* Allocate iommu entries for that segment */
paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE); npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
entry = iommu_range_alloc(dev, iommu, npages, &handle); entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
&handle, (unsigned long)(-1), 0);
/* Handle failure */ /* Handle failure */
if (unlikely(entry == DMA_ERROR_CODE)) { if (unlikely(entry == DMA_ERROR_CODE)) {
...@@ -404,8 +386,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, ...@@ -404,8 +386,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
iommu_batch_new_entry(entry); iommu_batch_new_entry(entry);
/* Convert entry to a dma_addr_t */ /* Convert entry to a dma_addr_t */
dma_addr = iommu->page_table_map_base + dma_addr = iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT);
(entry << IO_PAGE_SHIFT);
dma_addr |= (s->offset & ~IO_PAGE_MASK); dma_addr |= (s->offset & ~IO_PAGE_MASK);
/* Insert into HW table */ /* Insert into HW table */
...@@ -451,7 +432,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, ...@@ -451,7 +432,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
if (unlikely(err < 0L)) if (unlikely(err < 0L))
goto iommu_map_failed; goto iommu_map_failed;
spin_unlock_irqrestore(&iommu->lock, flags); local_irq_restore(flags);
if (outcount < incount) { if (outcount < incount) {
outs = sg_next(outs); outs = sg_next(outs);
...@@ -469,7 +450,8 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, ...@@ -469,7 +450,8 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
vaddr = s->dma_address & IO_PAGE_MASK; vaddr = s->dma_address & IO_PAGE_MASK;
npages = iommu_num_pages(s->dma_address, s->dma_length, npages = iommu_num_pages(s->dma_address, s->dma_length,
IO_PAGE_SIZE); IO_PAGE_SIZE);
iommu_range_free(iommu, vaddr, npages); iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
DMA_ERROR_CODE);
/* XXX demap? XXX */ /* XXX demap? XXX */
s->dma_address = DMA_ERROR_CODE; s->dma_address = DMA_ERROR_CODE;
s->dma_length = 0; s->dma_length = 0;
...@@ -477,7 +459,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, ...@@ -477,7 +459,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
if (s == outs) if (s == outs)
break; break;
} }
spin_unlock_irqrestore(&iommu->lock, flags); local_irq_restore(flags);
return 0; return 0;
} }
...@@ -489,7 +471,7 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, ...@@ -489,7 +471,7 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
struct pci_pbm_info *pbm; struct pci_pbm_info *pbm;
struct scatterlist *sg; struct scatterlist *sg;
struct iommu *iommu; struct iommu *iommu;
unsigned long flags; unsigned long flags, entry;
u32 devhandle; u32 devhandle;
BUG_ON(direction == DMA_NONE); BUG_ON(direction == DMA_NONE);
...@@ -498,33 +480,27 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, ...@@ -498,33 +480,27 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
pbm = dev->archdata.host_controller; pbm = dev->archdata.host_controller;
devhandle = pbm->devhandle; devhandle = pbm->devhandle;
spin_lock_irqsave(&iommu->lock, flags); local_irq_save(flags);
sg = sglist; sg = sglist;
while (nelems--) { while (nelems--) {
dma_addr_t dma_handle = sg->dma_address; dma_addr_t dma_handle = sg->dma_address;
unsigned int len = sg->dma_length; unsigned int len = sg->dma_length;
unsigned long npages, entry; unsigned long npages;
struct iommu_map_table *tbl = &iommu->tbl;
unsigned long shift = IO_PAGE_SHIFT;
if (!len) if (!len)
break; break;
npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE); npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
iommu_range_free(iommu, dma_handle, npages); entry = ((dma_handle - tbl->table_map_base) >> shift);
dma_4v_iommu_demap(&devhandle, entry, npages);
entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT); iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
while (npages) { DMA_ERROR_CODE);
unsigned long num;
num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
npages);
entry += num;
npages -= num;
}
sg = sg_next(sg); sg = sg_next(sg);
} }
spin_unlock_irqrestore(&iommu->lock, flags); local_irq_restore(flags);
} }
static struct dma_map_ops sun4v_dma_ops = { static struct dma_map_ops sun4v_dma_ops = {
...@@ -550,30 +526,33 @@ static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent) ...@@ -550,30 +526,33 @@ static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent)
} }
static unsigned long probe_existing_entries(struct pci_pbm_info *pbm, static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
struct iommu *iommu) struct iommu_map_table *iommu)
{ {
struct iommu_arena *arena = &iommu->arena; struct iommu_pool *pool;
unsigned long i, cnt = 0; unsigned long i, pool_nr, cnt = 0;
u32 devhandle; u32 devhandle;
devhandle = pbm->devhandle; devhandle = pbm->devhandle;
for (i = 0; i < arena->limit; i++) { for (pool_nr = 0; pool_nr < iommu->nr_pools; pool_nr++) {
unsigned long ret, io_attrs, ra; pool = &(iommu->pools[pool_nr]);
for (i = pool->start; i <= pool->end; i++) {
ret = pci_sun4v_iommu_getmap(devhandle, unsigned long ret, io_attrs, ra;
HV_PCI_TSBID(0, i),
&io_attrs, &ra); ret = pci_sun4v_iommu_getmap(devhandle,
if (ret == HV_EOK) { HV_PCI_TSBID(0, i),
if (page_in_phys_avail(ra)) { &io_attrs, &ra);
pci_sun4v_iommu_demap(devhandle, if (ret == HV_EOK) {
HV_PCI_TSBID(0, i), 1); if (page_in_phys_avail(ra)) {
} else { pci_sun4v_iommu_demap(devhandle,
cnt++; HV_PCI_TSBID(0,
__set_bit(i, arena->map); i), 1);
} else {
cnt++;
__set_bit(i, iommu->map);
}
} }
} }
} }
return cnt; return cnt;
} }
...@@ -603,20 +582,22 @@ static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm) ...@@ -603,20 +582,22 @@ static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
/* Setup initial software IOMMU state. */ /* Setup initial software IOMMU state. */
spin_lock_init(&iommu->lock); spin_lock_init(&iommu->lock);
iommu->ctx_lowest_free = 1; iommu->ctx_lowest_free = 1;
iommu->page_table_map_base = dma_offset; iommu->tbl.table_map_base = dma_offset;
iommu->dma_addr_mask = dma_mask; iommu->dma_addr_mask = dma_mask;
/* Allocate and initialize the free area map. */ /* Allocate and initialize the free area map. */
sz = (num_tsb_entries + 7) / 8; sz = (num_tsb_entries + 7) / 8;
sz = (sz + 7UL) & ~7UL; sz = (sz + 7UL) & ~7UL;
iommu->arena.map = kzalloc(sz, GFP_KERNEL); iommu->tbl.map = kzalloc(sz, GFP_KERNEL);
if (!iommu->arena.map) { if (!iommu->tbl.map) {
printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n"); printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n");
return -ENOMEM; return -ENOMEM;
} }
iommu->arena.limit = num_tsb_entries; iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
NULL, false /* no large_pool */,
sz = probe_existing_entries(pbm, iommu); 0 /* default npools */,
false /* want span boundary checking */);
sz = probe_existing_entries(pbm, &iommu->tbl);
if (sz) if (sz)
printk("%s: Imported %lu TSB entries from OBP\n", printk("%s: Imported %lu TSB entries from OBP\n",
pbm->name, sz); pbm->name, sz);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment