Commit e269b085 authored by Anton Blanchard's avatar Anton Blanchard Committed by Linus Torvalds

iommu: inline iommu_num_pages

A profile of a network benchmark showed iommu_num_pages rather high up:

     0.52%  iommu_num_pages

Looking at the profile, an integer divide is taking almost all of the time:

      %
         :      c000000000376ea4 <.iommu_num_pages>:
    1.93 :      c000000000376ea4:       fb e1 ff f8     std     r31,-8(r1)
    0.00 :      c000000000376ea8:       f8 21 ff c1     stdu    r1,-64(r1)
    0.00 :      c000000000376eac:       7c 3f 0b 78     mr      r31,r1
    3.86 :      c000000000376eb0:       38 84 ff ff     addi    r4,r4,-1
    0.00 :      c000000000376eb4:       38 05 ff ff     addi    r0,r5,-1
    0.00 :      c000000000376eb8:       7c 84 2a 14     add     r4,r4,r5
   46.95 :      c000000000376ebc:       7c 00 18 38     and     r0,r0,r3
   45.66 :      c000000000376ec0:       7c 84 02 14     add     r4,r4,r0
    0.00 :      c000000000376ec4:       7c 64 2b 92     divdu   r3,r4,r5
    0.00 :      c000000000376ec8:       38 3f 00 40     addi    r1,r31,64
    0.00 :      c000000000376ecc:       eb e1 ff f8     ld      r31,-8(r1)
    1.61 :      c000000000376ed0:       4e 80 00 20     blr

Since every caller of iommu_num_pages passes in a constant power of two
we can inline this such that the divide is replaced by a shift. The
entire function is only a few instructions once optimised, so it is
a good candidate for inlining overall.
Signed-off-by: default avatarAnton Blanchard <anton@samba.org>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 85c9fe8f
#ifndef _LINUX_IOMMU_HELPER_H #ifndef _LINUX_IOMMU_HELPER_H
#define _LINUX_IOMMU_HELPER_H #define _LINUX_IOMMU_HELPER_H
#include <linux/kernel.h>
static inline unsigned long iommu_device_max_index(unsigned long size, static inline unsigned long iommu_device_max_index(unsigned long size,
unsigned long offset, unsigned long offset,
u64 dma_mask) u64 dma_mask)
...@@ -20,7 +22,13 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, ...@@ -20,7 +22,13 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
unsigned long boundary_size, unsigned long boundary_size,
unsigned long align_mask); unsigned long align_mask);
extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len, static inline unsigned long iommu_num_pages(unsigned long addr,
unsigned long io_page_size); unsigned long len,
unsigned long io_page_size)
{
unsigned long size = (addr & (io_page_size - 1)) + len;
return DIV_ROUND_UP(size, io_page_size);
}
#endif #endif
...@@ -38,12 +38,3 @@ unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, ...@@ -38,12 +38,3 @@ unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
return -1; return -1;
} }
EXPORT_SYMBOL(iommu_area_alloc); EXPORT_SYMBOL(iommu_area_alloc);
unsigned long iommu_num_pages(unsigned long addr, unsigned long len,
unsigned long io_page_size)
{
unsigned long size = (addr & (io_page_size - 1)) + len;
return DIV_ROUND_UP(size, io_page_size);
}
EXPORT_SYMBOL(iommu_num_pages);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment