Commit 14001c60 authored by Nicholas Piggin's avatar Nicholas Piggin Committed by Michael Ellerman

powerpc/64s/radix: Optimize TLB range flush barriers

Short range flushes issue a sequences of tlbie(l) instructions for
individual effective addresses. These do not all require individual
barrier sequences, only one covering all tlbie(l) instructions.

Commit f7327e0b ("powerpc/mm/radix: Remove unnecessary ptesync")
made a similar optimization for tlbiel for PID flushing.

For tlbie, the ISA says:

    The tlbsync instruction provides an ordering function for the
    effects of all tlbie instructions executed by the thread executing
    the tlbsync instruction, with respect to the memory barrier
    created by a subsequent ptesync instruction executed by the same
    thread.

Time to munmap 30 pages of memory (after mmap, touch):
         local   global
vanilla  10.9us  22.3us
patched   3.4us  14.4us
Signed-off-by: default avatarNicholas Piggin <npiggin@gmail.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent a54c61f4
...@@ -84,7 +84,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) ...@@ -84,7 +84,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
trace_tlbie(0, 0, rb, rs, ric, prs, r); trace_tlbie(0, 0, rb, rs, ric, prs, r);
} }
static inline void _tlbiel_va(unsigned long va, unsigned long pid, static inline void __tlbiel_va(unsigned long va, unsigned long pid,
unsigned long ap, unsigned long ric) unsigned long ap, unsigned long ric)
{ {
unsigned long rb,rs,prs,r; unsigned long rb,rs,prs,r;
...@@ -95,14 +95,20 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid, ...@@ -95,14 +95,20 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
prs = 1; /* process scoped */ prs = 1; /* process scoped */
r = 1; /* raidx format */ r = 1; /* raidx format */
asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("ptesync": : :"memory");
trace_tlbie(0, 1, rb, rs, ric, prs, r); trace_tlbie(0, 1, rb, rs, ric, prs, r);
} }
static inline void _tlbie_va(unsigned long va, unsigned long pid, static inline void _tlbiel_va(unsigned long va, unsigned long pid,
unsigned long ap, unsigned long ric)
{
asm volatile("ptesync": : :"memory");
__tlbiel_va(va, pid, ap, ric);
asm volatile("ptesync": : :"memory");
}
static inline void __tlbie_va(unsigned long va, unsigned long pid,
unsigned long ap, unsigned long ric) unsigned long ap, unsigned long ric)
{ {
unsigned long rb,rs,prs,r; unsigned long rb,rs,prs,r;
...@@ -113,13 +119,20 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, ...@@ -113,13 +119,20 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
prs = 1; /* process scoped */ prs = 1; /* process scoped */
r = 1; /* raidx format */ r = 1; /* raidx format */
asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
trace_tlbie(0, 0, rb, rs, ric, prs, r); trace_tlbie(0, 0, rb, rs, ric, prs, r);
} }
static inline void _tlbie_va(unsigned long va, unsigned long pid,
unsigned long ap, unsigned long ric)
{
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, ap, ric);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
/* /*
* Base TLB flushing operations: * Base TLB flushing operations:
* *
...@@ -341,13 +354,17 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, ...@@ -341,13 +354,17 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
else else
_tlbie_pid(pid, RIC_FLUSH_TLB); _tlbie_pid(pid, RIC_FLUSH_TLB);
} else { } else {
asm volatile("ptesync": : :"memory");
for (addr = start; addr < end; addr += page_size) { for (addr = start; addr < end; addr += page_size) {
if (local) if (local)
_tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
else else
_tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
} }
if (local)
asm volatile("ptesync": : :"memory");
else
asm volatile("eieio; tlbsync; ptesync": : :"memory");
} }
preempt_enable(); preempt_enable();
} }
...@@ -378,6 +395,7 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) ...@@ -378,6 +395,7 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
_tlbie_pid(pid, RIC_FLUSH_PWC); _tlbie_pid(pid, RIC_FLUSH_PWC);
/* Then iterate the pages */ /* Then iterate the pages */
asm volatile("ptesync": : :"memory");
end = addr + HPAGE_PMD_SIZE; end = addr + HPAGE_PMD_SIZE;
for (; addr < end; addr += PAGE_SIZE) { for (; addr < end; addr += PAGE_SIZE) {
if (local) if (local)
...@@ -386,6 +404,11 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) ...@@ -386,6 +404,11 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
_tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
} }
if (local)
asm volatile("ptesync": : :"memory");
else
asm volatile("eieio; tlbsync; ptesync": : :"memory");
preempt_enable(); preempt_enable();
} }
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment