Commit 49cc0c43 authored by David S. Miller's avatar David S. Miller

Merge branch 'sun4v-64bit-DMA'

Tushar Dave says:

====================
sparc: Enable sun4v hypervisor PCI IOMMU v2 APIs and ATU

ATU (Address Translation Unit) is a new IOMMU in SPARC supported with
sun4v hypervisor PCI IOMMU v2 APIs.

Current SPARC IOMMU supports only 32bit address ranges and one TSB
per PCIe root complex that has a 2GB per root complex DVMA space
limit. The limit has become a scalability bottleneck nowadays that
a typical 10G/40G NIC can consume 500MB DVMA space per instance.
When DVMA resource is exhausted, devices will not be usable
since the driver can't allocate DVMA.

For example, we recently experienced legacy IOMMU limitation while
using i40e driver in system with large number of CPUs (e.g. 128).
Four ports of i40e, each request 128 QP (Queue Pairs). Each queue has
512 (default) descriptors. So considering only RX queues (because RX
premap DMA buffers), i40e takes 4*128*512 number of DMA entries in
IOMMU table. Legacy IOMMU can have at max (2G/8K)- 1 entries available
in table. So bringing up four instance of i40e alone saturate existing
IOMMU resource.

ATU removes bottleneck by allowing guest os to create IOTSB of size
32G (or more) with 64bit address ranges available in ATU HW. 32G is
more than enough DVMA space to be shared by all PCIe devices under
root complex contrast to 2G space provided by legacy IOMMU.

ATU allows PCIe devices to use 64bit DMA addressing. Devices
which choose to use 32bit DMA mask will continue to work with the
existing legacy IOMMU.

The patch set is tested on sun4v (T1000, T2000, T3, T4, T5, T7, S7)
and sun4u SPARC.

Thanks.
-Tushar

v2->v3:
- Patch #5 addresses comment by Joe Perches.
 -- use %s, __func__ instead of embedding the function name.

v1->v2:
- Patch #2 addresses comments by Dave M.
 -- use page allocator to allocate IOTSB.
 -- use true/false with boolean variables.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 87a349f9 d30a6b84
......@@ -89,6 +89,14 @@ config ARCH_DEFCONFIG
config ARCH_PROC_KCORE_TEXT
def_bool y
config ARCH_ATU
bool
default y if SPARC64
config ARCH_DMA_ADDR_T_64BIT
bool
default y if ARCH_ATU
config IOMMU_HELPER
bool
default y if SPARC64
......@@ -304,6 +312,20 @@ config ARCH_SPARSEMEM_ENABLE
config ARCH_SPARSEMEM_DEFAULT
def_bool y if SPARC64
config FORCE_MAX_ZONEORDER
int "Maximum zone order"
default "13"
help
The kernel memory allocator divides physically contiguous memory
blocks into "zones", where each zone is a power of two number of
pages. This option selects the largest power of two that the kernel
keeps in the memory allocator. If you need to allocate very large
blocks of physically contiguous memory, then you may need to
increase this value.
This config option is actually maximum order plus one. For example,
a value of 13 means that the largest free memory block is 2^12 pages.
source "mm/Kconfig"
if SPARC64
......
This diff is collapsed.
......@@ -24,8 +24,36 @@ struct iommu_arena {
unsigned int limit;
};
#define ATU_64_SPACE_SIZE 0x800000000 /* 32G */
/* Data structures for SPARC ATU architecture */
struct atu_iotsb {
void *table; /* IOTSB table base virtual addr*/
u64 ra; /* IOTSB table real addr */
u64 dvma_size; /* ranges[3].size or OS slected 32G size */
u64 dvma_base; /* ranges[3].base */
u64 table_size; /* IOTSB table size */
u64 page_size; /* IO PAGE size for IOTSB */
u32 iotsb_num; /* tsbnum is same as iotsb_handle */
};
struct atu_ranges {
u64 base;
u64 size;
};
struct atu {
struct atu_ranges *ranges;
struct atu_iotsb *iotsb;
struct iommu_map_table tbl;
u64 base;
u64 size;
u64 dma_addr_mask;
};
struct iommu {
struct iommu_map_table tbl;
struct atu *atu;
spinlock_t lock;
u32 dma_addr_mask;
iopte_t *page_table;
......
......@@ -39,6 +39,7 @@ static struct api_info api_table[] = {
{ .group = HV_GRP_SDIO, },
{ .group = HV_GRP_SDIO_ERR, },
{ .group = HV_GRP_REBOOT_DATA, },
{ .group = HV_GRP_ATU, .flags = FLAG_PRE_API },
{ .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API },
{ .group = HV_GRP_FIRE_PERF, },
{ .group = HV_GRP_N2_CPU, },
......
......@@ -760,8 +760,12 @@ int dma_supported(struct device *dev, u64 device_mask)
struct iommu *iommu = dev->archdata.iommu;
u64 dma_addr_mask = iommu->dma_addr_mask;
if (device_mask >= (1UL << 32UL))
if (device_mask > DMA_BIT_MASK(32)) {
if (iommu->atu)
dma_addr_mask = iommu->atu->dma_addr_mask;
else
return 0;
}
if ((device_mask & dma_addr_mask) == dma_addr_mask)
return 1;
......
This diff is collapsed.
......@@ -89,4 +89,25 @@ unsigned long pci_sun4v_msg_setvalid(unsigned long devhandle,
unsigned long msinum,
unsigned long valid);
/* Sun4v HV IOMMU v2 APIs */
unsigned long pci_sun4v_iotsb_conf(unsigned long devhandle,
unsigned long ra,
unsigned long table_size,
unsigned long page_size,
unsigned long dvma_base,
u64 *iotsb_num);
unsigned long pci_sun4v_iotsb_bind(unsigned long devhandle,
unsigned long iotsb_num,
unsigned int pci_device);
unsigned long pci_sun4v_iotsb_map(unsigned long devhandle,
unsigned long iotsb_num,
unsigned long iotsb_index_iottes,
unsigned long io_attributes,
unsigned long io_page_list_pa,
long *mapped);
unsigned long pci_sun4v_iotsb_demap(unsigned long devhandle,
unsigned long iotsb_num,
unsigned long iotsb_index,
unsigned long iottes,
unsigned long *demapped);
#endif /* !(_PCI_SUN4V_H) */
......@@ -360,3 +360,71 @@ ENTRY(pci_sun4v_msg_setvalid)
mov %o0, %o0
ENDPROC(pci_sun4v_msg_setvalid)
/*
* %o0: devhandle
* %o1: r_addr
* %o2: size
* %o3: pagesize
* %o4: virt
* %o5: &iotsb_num/&iotsb_handle
*
* returns %o0: status
* %o1: iotsb_num/iotsb_handle
*/
ENTRY(pci_sun4v_iotsb_conf)
mov %o5, %g1
mov HV_FAST_PCI_IOTSB_CONF, %o5
ta HV_FAST_TRAP
retl
stx %o1, [%g1]
ENDPROC(pci_sun4v_iotsb_conf)
/*
* %o0: devhandle
* %o1: iotsb_num/iotsb_handle
* %o2: pci_device
*
* returns %o0: status
*/
ENTRY(pci_sun4v_iotsb_bind)
mov HV_FAST_PCI_IOTSB_BIND, %o5
ta HV_FAST_TRAP
retl
nop
ENDPROC(pci_sun4v_iotsb_bind)
/*
* %o0: devhandle
* %o1: iotsb_num/iotsb_handle
* %o2: index_count
* %o3: iotte_attributes
* %o4: io_page_list_p
* %o5: &mapped
*
* returns %o0: status
* %o1: #mapped
*/
ENTRY(pci_sun4v_iotsb_map)
mov %o5, %g1
mov HV_FAST_PCI_IOTSB_MAP, %o5
ta HV_FAST_TRAP
retl
stx %o1, [%g1]
ENDPROC(pci_sun4v_iotsb_map)
/*
* %o0: devhandle
* %o1: iotsb_num/iotsb_handle
* %o2: iotsb_index
* %o3: #iottes
* %o4: &demapped
*
* returns %o0: status
* %o1: #demapped
*/
ENTRY(pci_sun4v_iotsb_demap)
mov HV_FAST_PCI_IOTSB_DEMAP, %o5
ta HV_FAST_TRAP
retl
stx %o1, [%o4]
ENDPROC(pci_sun4v_iotsb_demap)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment