Commit aa7bde1a authored by chris hyser's avatar chris hyser Committed by David S. Miller

sparc64: Enable setting "relaxed ordering" in IOMMU mappings

Enable relaxed ordering for memory writes in IOMMU TSB entry from
dma_4v_alloc_coherent(), dma_4v_map_page() and dma_4v_map_sg() when
dma_attrs DMA_ATTR_WEAK_ORDERING is set. This requires PCI IOMMU I/O
Translation Services version 2.0 API.

Many PCIe devices allow enabling relaxed-ordering (memory writes bypassing
other memory writes) for various DMA buffers. A notable exception is the
Mellanox mlx4 IB adapter. Due to the nature of x86 HW this appears to have
little performance impact there. On SPARC HW however, this results in major
performance degradation getting only about 3Gbps. Enabling RO in the IOMMU
entries corresponding to mlx4 data buffers increases the throughput to
about 13 Gbps.

Orabug: 19245907
Signed-off-by: default avatarChris Hyser <chris.hyser@oracle.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 8914391b
...@@ -1744,6 +1744,7 @@ unsigned long sun4v_vintr_set_target(unsigned long dev_handle, ...@@ -1744,6 +1744,7 @@ unsigned long sun4v_vintr_set_target(unsigned long dev_handle,
#define HV_PCI_MAP_ATTR_READ 0x01 #define HV_PCI_MAP_ATTR_READ 0x01
#define HV_PCI_MAP_ATTR_WRITE 0x02 #define HV_PCI_MAP_ATTR_WRITE 0x02
#define HV_PCI_MAP_ATTR_RELAXED_ORDER 0x04
#define HV_PCI_DEVICE_BUILD(b,d,f) \ #define HV_PCI_DEVICE_BUILD(b,d,f) \
((((b) & 0xff) << 16) | \ ((((b) & 0xff) << 16) | \
......
...@@ -78,6 +78,10 @@ static long iommu_batch_flush(struct iommu_batch *p) ...@@ -78,6 +78,10 @@ static long iommu_batch_flush(struct iommu_batch *p)
u64 *pglist = p->pglist; u64 *pglist = p->pglist;
unsigned long npages = p->npages; unsigned long npages = p->npages;
/* VPCI maj=1, min=[0,1] only supports read and write */
if (vpci_major < 2)
prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE);
while (npages != 0) { while (npages != 0) {
long num; long num;
...@@ -144,6 +148,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, ...@@ -144,6 +148,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
unsigned long attrs) unsigned long attrs)
{ {
unsigned long flags, order, first_page, npages, n; unsigned long flags, order, first_page, npages, n;
unsigned long prot = 0;
struct iommu *iommu; struct iommu *iommu;
struct page *page; struct page *page;
void *ret; void *ret;
...@@ -157,6 +162,9 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, ...@@ -157,6 +162,9 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
npages = size >> IO_PAGE_SHIFT; npages = size >> IO_PAGE_SHIFT;
if (attrs & DMA_ATTR_WEAK_ORDERING)
prot = HV_PCI_MAP_ATTR_RELAXED_ORDER;
nid = dev->archdata.numa_node; nid = dev->archdata.numa_node;
page = alloc_pages_node(nid, gfp, order); page = alloc_pages_node(nid, gfp, order);
if (unlikely(!page)) if (unlikely(!page))
...@@ -180,7 +188,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, ...@@ -180,7 +188,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
local_irq_save(flags); local_irq_save(flags);
iommu_batch_start(dev, iommu_batch_start(dev,
(HV_PCI_MAP_ATTR_READ | (HV_PCI_MAP_ATTR_READ | prot |
HV_PCI_MAP_ATTR_WRITE), HV_PCI_MAP_ATTR_WRITE),
entry); entry);
...@@ -277,6 +285,9 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, ...@@ -277,6 +285,9 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
if (direction != DMA_TO_DEVICE) if (direction != DMA_TO_DEVICE)
prot |= HV_PCI_MAP_ATTR_WRITE; prot |= HV_PCI_MAP_ATTR_WRITE;
if (attrs & DMA_ATTR_WEAK_ORDERING)
prot |= HV_PCI_MAP_ATTR_RELAXED_ORDER;
local_irq_save(flags); local_irq_save(flags);
iommu_batch_start(dev, prot, entry); iommu_batch_start(dev, prot, entry);
...@@ -355,6 +366,9 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, ...@@ -355,6 +366,9 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
if (direction != DMA_TO_DEVICE) if (direction != DMA_TO_DEVICE)
prot |= HV_PCI_MAP_ATTR_WRITE; prot |= HV_PCI_MAP_ATTR_WRITE;
if (attrs & DMA_ATTR_WEAK_ORDERING)
prot |= HV_PCI_MAP_ATTR_RELAXED_ORDER;
outs = s = segstart = &sglist[0]; outs = s = segstart = &sglist[0];
outcount = 1; outcount = 1;
incount = nelems; incount = nelems;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment