Commit f0248c15 authored by Tushar Dave's avatar Tushar Dave Committed by David S. Miller

sparc64: Add ATU (new IOMMU) support

ATU (Address Translation Unit) is a new IOMMU in SPARC supported with
Hypervisor IOMMU v2 APIs.

Current SPARC IOMMU supports only 32bit address ranges and one TSB
per PCIe root complex that has a 2GB per root complex DVMA space
limit. The limit has become a scalability bottleneck nowadays that
a typical 10G/40G NIC can consume 300MB-500MB DVMA space per
instance. When DVMA resource is exhausted, devices will not be usable
since the driver can't allocate DVMA.

ATU removes bottleneck by allowing guest os to create IOTSB of size
32G (or more) with 64bit address ranges available in ATU HW. 32G is
more than enough DVMA space to be shared by all PCIe devices under
root complex contrast to 2G space provided by legacy IOMMU.

ATU allows PCIe devices to use 64bit DMA addressing. Devices
which choose to use 32bit DMA mask will continue to work with the
existing legacy IOMMU.
Signed-off-by: default avatarTushar Dave <tushar.n.dave@oracle.com>
Reviewed-by: default avatarchris hyser <chris.hyser@oracle.com>
Acked-by: default avatarSowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c88c545b
This diff is collapsed.
...@@ -24,8 +24,34 @@ struct iommu_arena { ...@@ -24,8 +24,34 @@ struct iommu_arena {
unsigned int limit; unsigned int limit;
}; };
#define ATU_64_SPACE_SIZE 0x800000000 /* 32G */
/* Data structures for SPARC ATU architecture */
struct atu_iotsb {
void *table; /* IOTSB table base virtual addr*/
u64 ra; /* IOTSB table real addr */
u64 dvma_size; /* ranges[3].size or OS slected 32G size */
u64 dvma_base; /* ranges[3].base */
u64 table_size; /* IOTSB table size */
u64 page_size; /* IO PAGE size for IOTSB */
u32 iotsb_num; /* tsbnum is same as iotsb_handle */
};
struct atu_ranges {
u64 base;
u64 size;
};
struct atu {
struct atu_ranges *ranges;
struct atu_iotsb *iotsb;
u64 base;
u64 size;
};
struct iommu { struct iommu {
struct iommu_map_table tbl; struct iommu_map_table tbl;
struct atu *atu;
spinlock_t lock; spinlock_t lock;
u32 dma_addr_mask; u32 dma_addr_mask;
iopte_t *page_table; iopte_t *page_table;
......
...@@ -39,6 +39,7 @@ static struct api_info api_table[] = { ...@@ -39,6 +39,7 @@ static struct api_info api_table[] = {
{ .group = HV_GRP_SDIO, }, { .group = HV_GRP_SDIO, },
{ .group = HV_GRP_SDIO_ERR, }, { .group = HV_GRP_SDIO_ERR, },
{ .group = HV_GRP_REBOOT_DATA, }, { .group = HV_GRP_REBOOT_DATA, },
{ .group = HV_GRP_ATU, .flags = FLAG_PRE_API },
{ .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API }, { .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API },
{ .group = HV_GRP_FIRE_PERF, }, { .group = HV_GRP_FIRE_PERF, },
{ .group = HV_GRP_N2_CPU, }, { .group = HV_GRP_N2_CPU, },
......
...@@ -44,6 +44,9 @@ static struct vpci_version vpci_versions[] = { ...@@ -44,6 +44,9 @@ static struct vpci_version vpci_versions[] = {
{ .major = 1, .minor = 1 }, { .major = 1, .minor = 1 },
}; };
static unsigned long vatu_major = 1;
static unsigned long vatu_minor = 1;
#define PGLIST_NENTS (PAGE_SIZE / sizeof(u64)) #define PGLIST_NENTS (PAGE_SIZE / sizeof(u64))
struct iommu_batch { struct iommu_batch {
...@@ -581,6 +584,107 @@ static unsigned long probe_existing_entries(struct pci_pbm_info *pbm, ...@@ -581,6 +584,107 @@ static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
return cnt; return cnt;
} }
static int pci_sun4v_atu_alloc_iotsb(struct pci_pbm_info *pbm)
{
struct atu *atu = pbm->iommu->atu;
struct atu_iotsb *iotsb;
void *table;
u64 table_size;
u64 iotsb_num;
unsigned long order;
unsigned long err;
iotsb = kzalloc(sizeof(*iotsb), GFP_KERNEL);
if (!iotsb) {
err = -ENOMEM;
goto out_err;
}
atu->iotsb = iotsb;
/* calculate size of IOTSB */
table_size = (atu->size / IO_PAGE_SIZE) * 8;
order = get_order(table_size);
table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
if (!table) {
err = -ENOMEM;
goto table_failed;
}
iotsb->table = table;
iotsb->ra = __pa(table);
iotsb->dvma_size = atu->size;
iotsb->dvma_base = atu->base;
iotsb->table_size = table_size;
iotsb->page_size = IO_PAGE_SIZE;
/* configure and register IOTSB with HV */
err = pci_sun4v_iotsb_conf(pbm->devhandle,
iotsb->ra,
iotsb->table_size,
iotsb->page_size,
iotsb->dvma_base,
&iotsb_num);
if (err) {
pr_err(PFX "pci_iotsb_conf failed error: %ld\n", err);
goto iotsb_conf_failed;
}
iotsb->iotsb_num = iotsb_num;
return 0;
iotsb_conf_failed:
free_pages((unsigned long)table, order);
table_failed:
kfree(iotsb);
out_err:
return err;
}
static int pci_sun4v_atu_init(struct pci_pbm_info *pbm)
{
struct atu *atu = pbm->iommu->atu;
unsigned long err;
const u64 *ranges;
const u32 *page_size;
int len;
ranges = of_get_property(pbm->op->dev.of_node, "iommu-address-ranges",
&len);
if (!ranges) {
pr_err(PFX "No iommu-address-ranges\n");
return -EINVAL;
}
page_size = of_get_property(pbm->op->dev.of_node, "iommu-pagesizes",
NULL);
if (!page_size) {
pr_err(PFX "No iommu-pagesizes\n");
return -EINVAL;
}
/* There are 4 iommu-address-ranges supported. Each range is pair of
* {base, size}. The ranges[0] and ranges[1] are 32bit address space
* while ranges[2] and ranges[3] are 64bit space. We want to use 64bit
* address ranges to support 64bit addressing. Because 'size' for
* address ranges[2] and ranges[3] are same we can select either of
* ranges[2] or ranges[3] for mapping. However due to 'size' is too
* large for OS to allocate IOTSB we are using fix size 32G
* (ATU_64_SPACE_SIZE) which is more than enough for all PCIe devices
* to share.
*/
atu->ranges = (struct atu_ranges *)ranges;
atu->base = atu->ranges[3].base;
atu->size = ATU_64_SPACE_SIZE;
/* Create IOTSB */
err = pci_sun4v_atu_alloc_iotsb(pbm);
if (err) {
pr_err(PFX "Error creating ATU IOTSB\n");
return err;
}
return 0;
}
static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm) static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
{ {
static const u32 vdma_default[] = { 0x80000000, 0x80000000 }; static const u32 vdma_default[] = { 0x80000000, 0x80000000 };
...@@ -918,6 +1022,18 @@ static int pci_sun4v_pbm_init(struct pci_pbm_info *pbm, ...@@ -918,6 +1022,18 @@ static int pci_sun4v_pbm_init(struct pci_pbm_info *pbm,
pci_sun4v_scan_bus(pbm, &op->dev); pci_sun4v_scan_bus(pbm, &op->dev);
/* if atu_init fails its not complete failure.
* we can still continue using legacy iommu.
*/
if (pbm->iommu->atu) {
err = pci_sun4v_atu_init(pbm);
if (err) {
kfree(pbm->iommu->atu);
pbm->iommu->atu = NULL;
pr_err(PFX "ATU init failed, err=%d\n", err);
}
}
pbm->next = pci_pbm_root; pbm->next = pci_pbm_root;
pci_pbm_root = pbm; pci_pbm_root = pbm;
...@@ -931,8 +1047,10 @@ static int pci_sun4v_probe(struct platform_device *op) ...@@ -931,8 +1047,10 @@ static int pci_sun4v_probe(struct platform_device *op)
struct pci_pbm_info *pbm; struct pci_pbm_info *pbm;
struct device_node *dp; struct device_node *dp;
struct iommu *iommu; struct iommu *iommu;
struct atu *atu;
u32 devhandle; u32 devhandle;
int i, err = -ENODEV; int i, err = -ENODEV;
static bool hv_atu = true;
dp = op->dev.of_node; dp = op->dev.of_node;
...@@ -954,6 +1072,19 @@ static int pci_sun4v_probe(struct platform_device *op) ...@@ -954,6 +1072,19 @@ static int pci_sun4v_probe(struct platform_device *op)
pr_info(PFX "Registered hvapi major[%lu] minor[%lu]\n", pr_info(PFX "Registered hvapi major[%lu] minor[%lu]\n",
vpci_major, vpci_minor); vpci_major, vpci_minor);
err = sun4v_hvapi_register(HV_GRP_ATU, vatu_major, &vatu_minor);
if (err) {
/* don't return an error if we fail to register the
* ATU group, but ATU hcalls won't be available.
*/
hv_atu = false;
pr_err(PFX "Could not register hvapi ATU err=%d\n",
err);
} else {
pr_info(PFX "Registered hvapi ATU major[%lu] minor[%lu]\n",
vatu_major, vatu_minor);
}
dma_ops = &sun4v_dma_ops; dma_ops = &sun4v_dma_ops;
} }
...@@ -991,6 +1122,14 @@ static int pci_sun4v_probe(struct platform_device *op) ...@@ -991,6 +1122,14 @@ static int pci_sun4v_probe(struct platform_device *op)
} }
pbm->iommu = iommu; pbm->iommu = iommu;
iommu->atu = NULL;
if (hv_atu) {
atu = kzalloc(sizeof(*atu), GFP_KERNEL);
if (!atu)
pr_err(PFX "Could not allocate atu\n");
else
iommu->atu = atu;
}
err = pci_sun4v_pbm_init(pbm, op, devhandle); err = pci_sun4v_pbm_init(pbm, op, devhandle);
if (err) if (err)
...@@ -1001,6 +1140,7 @@ static int pci_sun4v_probe(struct platform_device *op) ...@@ -1001,6 +1140,7 @@ static int pci_sun4v_probe(struct platform_device *op)
return 0; return 0;
out_free_iommu: out_free_iommu:
kfree(iommu->atu);
kfree(pbm->iommu); kfree(pbm->iommu);
out_free_controller: out_free_controller:
......
...@@ -89,4 +89,11 @@ unsigned long pci_sun4v_msg_setvalid(unsigned long devhandle, ...@@ -89,4 +89,11 @@ unsigned long pci_sun4v_msg_setvalid(unsigned long devhandle,
unsigned long msinum, unsigned long msinum,
unsigned long valid); unsigned long valid);
/* Sun4v HV IOMMU v2 APIs */
unsigned long pci_sun4v_iotsb_conf(unsigned long devhandle,
unsigned long ra,
unsigned long table_size,
unsigned long page_size,
unsigned long dvma_base,
u64 *iotsb_num);
#endif /* !(_PCI_SUN4V_H) */ #endif /* !(_PCI_SUN4V_H) */
...@@ -360,3 +360,21 @@ ENTRY(pci_sun4v_msg_setvalid) ...@@ -360,3 +360,21 @@ ENTRY(pci_sun4v_msg_setvalid)
mov %o0, %o0 mov %o0, %o0
ENDPROC(pci_sun4v_msg_setvalid) ENDPROC(pci_sun4v_msg_setvalid)
/*
* %o0: devhandle
* %o1: r_addr
* %o2: size
* %o3: pagesize
* %o4: virt
* %o5: &iotsb_num/&iotsb_handle
*
* returns %o0: status
* %o1: iotsb_num/iotsb_handle
*/
ENTRY(pci_sun4v_iotsb_conf)
mov %o5, %g1
mov HV_FAST_PCI_IOTSB_CONF, %o5
ta HV_FAST_TRAP
retl
stx %o1, [%g1]
ENDPROC(pci_sun4v_iotsb_conf)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment