Commit 1f221a0d authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Konrad Rzeszutek Wilk

swiotlb: respect min_align_mask

Respect the min_align_mask in struct device_dma_parameters in swiotlb.

There are two parts to it:
 1) for the lower bits of the alignment inside the io tlb slot, just
    extent the size of the allocation and leave the start of the slot
     empty
 2) for the high bits ensure we find a slot that matches the high bits
    of the alignment to avoid wasting too much memory

Based on an earlier patch from Jianxiong Gao <jxgao@google.com>.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Acked-by: default avatarJianxiong Gao <jxgao@google.com>
Tested-by: default avatarJianxiong Gao <jxgao@google.com>
Signed-off-by: default avatarKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
parent 16fc3cef
...@@ -470,6 +470,14 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, ...@@ -470,6 +470,14 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
#define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT)) #define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT))
/*
* Return the offset into a iotlb slot required to keep the device happy.
*/
static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
{
return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
}
/* /*
* Carefully handle integer overflow which can occur when boundary_mask == ~0UL. * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
*/ */
...@@ -491,24 +499,29 @@ static unsigned int wrap_index(unsigned int index) ...@@ -491,24 +499,29 @@ static unsigned int wrap_index(unsigned int index)
* Find a suitable number of IO TLB entries size that will fit this request and * Find a suitable number of IO TLB entries size that will fit this request and
* allocate a buffer from that IO TLB pool. * allocate a buffer from that IO TLB pool.
*/ */
static int find_slots(struct device *dev, size_t alloc_size) static int find_slots(struct device *dev, phys_addr_t orig_addr,
size_t alloc_size)
{ {
unsigned long boundary_mask = dma_get_seg_boundary(dev); unsigned long boundary_mask = dma_get_seg_boundary(dev);
dma_addr_t tbl_dma_addr = dma_addr_t tbl_dma_addr =
phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask; phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask;
unsigned long max_slots = get_max_slots(boundary_mask); unsigned long max_slots = get_max_slots(boundary_mask);
unsigned int nslots = nr_slots(alloc_size), stride = 1; unsigned int iotlb_align_mask =
dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
unsigned int nslots = nr_slots(alloc_size), stride;
unsigned int index, wrap, count = 0, i; unsigned int index, wrap, count = 0, i;
unsigned long flags; unsigned long flags;
BUG_ON(!nslots); BUG_ON(!nslots);
/* /*
* For mappings greater than or equal to a page, we limit the stride * For mappings with an alignment requirement don't bother looping to
* (and hence alignment) to a page size. * unaligned slots once we found an aligned one. For allocations of
* PAGE_SIZE or larger only look for page aligned allocations.
*/ */
stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
if (alloc_size >= PAGE_SIZE) if (alloc_size >= PAGE_SIZE)
stride <<= (PAGE_SHIFT - IO_TLB_SHIFT); stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
spin_lock_irqsave(&io_tlb_lock, flags); spin_lock_irqsave(&io_tlb_lock, flags);
if (unlikely(nslots > io_tlb_nslabs - io_tlb_used)) if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
...@@ -516,6 +529,12 @@ static int find_slots(struct device *dev, size_t alloc_size) ...@@ -516,6 +529,12 @@ static int find_slots(struct device *dev, size_t alloc_size)
index = wrap = wrap_index(ALIGN(io_tlb_index, stride)); index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
do { do {
if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
(orig_addr & iotlb_align_mask)) {
index = wrap_index(index + 1);
continue;
}
/* /*
* If we find a slot that indicates we have 'nslots' number of * If we find a slot that indicates we have 'nslots' number of
* contiguous buffers, we allocate the buffers from that slot * contiguous buffers, we allocate the buffers from that slot
...@@ -559,6 +578,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, ...@@ -559,6 +578,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
size_t mapping_size, size_t alloc_size, size_t mapping_size, size_t alloc_size,
enum dma_data_direction dir, unsigned long attrs) enum dma_data_direction dir, unsigned long attrs)
{ {
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
unsigned int index, i; unsigned int index, i;
phys_addr_t tlb_addr; phys_addr_t tlb_addr;
...@@ -574,7 +594,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, ...@@ -574,7 +594,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
return (phys_addr_t)DMA_MAPPING_ERROR; return (phys_addr_t)DMA_MAPPING_ERROR;
} }
index = find_slots(dev, alloc_size); index = find_slots(dev, orig_addr, alloc_size + offset);
if (index == -1) { if (index == -1) {
if (!(attrs & DMA_ATTR_NO_WARN)) if (!(attrs & DMA_ATTR_NO_WARN))
dev_warn_ratelimited(dev, dev_warn_ratelimited(dev,
...@@ -588,10 +608,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, ...@@ -588,10 +608,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
* This is needed when we sync the memory. Then we sync the buffer if * This is needed when we sync the memory. Then we sync the buffer if
* needed. * needed.
*/ */
for (i = 0; i < nr_slots(alloc_size); i++) for (i = 0; i < nr_slots(alloc_size + offset); i++)
io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i); io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
tlb_addr = slot_addr(io_tlb_start, index); tlb_addr = slot_addr(io_tlb_start, index) + offset;
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
...@@ -606,8 +626,9 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, ...@@ -606,8 +626,9 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
enum dma_data_direction dir, unsigned long attrs) enum dma_data_direction dir, unsigned long attrs)
{ {
unsigned long flags; unsigned long flags;
int i, count, nslots = nr_slots(alloc_size); unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; int i, count, nslots = nr_slots(alloc_size + offset);
int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT;
phys_addr_t orig_addr = io_tlb_orig_addr[index]; phys_addr_t orig_addr = io_tlb_orig_addr[index];
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment