Commit c1a86d3b authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] ppc64: Add some POWER5 specific optimisations

From: Anton Blanchard <anton@samba.org>

Add some POWER5 specific optimisations:
- icache is coherent, no need to explicitly flush
- tlbie lock no longer required
parent 01007b4e
...@@ -132,7 +132,7 @@ _GLOBAL(flush_instruction_cache) ...@@ -132,7 +132,7 @@ _GLOBAL(flush_instruction_cache)
* flush all bytes from start through stop-1 inclusive * flush all bytes from start through stop-1 inclusive
*/ */
_GLOBAL(flush_icache_range) _GLOBAL(__flush_icache_range)
/* /*
* Flush the data cache to memory * Flush the data cache to memory
......
...@@ -221,9 +221,11 @@ static long pSeries_hpte_updatepp(unsigned long slot, unsigned long newpp, ...@@ -221,9 +221,11 @@ static long pSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
if ((cur_cpu_spec->cpu_features & CPU_FTR_TLBIEL) && !large && local) { if ((cur_cpu_spec->cpu_features & CPU_FTR_TLBIEL) && !large && local) {
tlbiel(va); tlbiel(va);
} else { } else {
spin_lock_irqsave(&pSeries_tlbie_lock, flags); if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_lock_irqsave(&pSeries_tlbie_lock, flags);
tlbie(va, large); tlbie(va, large);
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags); if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
} }
return ret; return ret;
...@@ -255,9 +257,11 @@ static void pSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) ...@@ -255,9 +257,11 @@ static void pSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
set_pp_bit(newpp, hptep); set_pp_bit(newpp, hptep);
/* Ensure it is out of the tlb too */ /* Ensure it is out of the tlb too */
spin_lock_irqsave(&pSeries_tlbie_lock, flags); if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_lock_irqsave(&pSeries_tlbie_lock, flags);
tlbie(va, 0); tlbie(va, 0);
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags); if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
} }
static void pSeries_hpte_invalidate(unsigned long slot, unsigned long va, static void pSeries_hpte_invalidate(unsigned long slot, unsigned long va,
...@@ -287,9 +291,11 @@ static void pSeries_hpte_invalidate(unsigned long slot, unsigned long va, ...@@ -287,9 +291,11 @@ static void pSeries_hpte_invalidate(unsigned long slot, unsigned long va,
if ((cur_cpu_spec->cpu_features & CPU_FTR_TLBIEL) && !large && local) { if ((cur_cpu_spec->cpu_features & CPU_FTR_TLBIEL) && !large && local) {
tlbiel(va); tlbiel(va);
} else { } else {
spin_lock_irqsave(&pSeries_tlbie_lock, flags); if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_lock_irqsave(&pSeries_tlbie_lock, flags);
tlbie(va, large); tlbie(va, large);
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags); if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
} }
} }
...@@ -356,7 +362,8 @@ static void pSeries_flush_hash_range(unsigned long context, ...@@ -356,7 +362,8 @@ static void pSeries_flush_hash_range(unsigned long context,
asm volatile("ptesync":::"memory"); asm volatile("ptesync":::"memory");
} else { } else {
/* XXX double check that it is safe to take this late */ /* XXX double check that it is safe to take this late */
spin_lock_irqsave(&pSeries_tlbie_lock, flags); if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_lock_irqsave(&pSeries_tlbie_lock, flags);
asm volatile("ptesync":::"memory"); asm volatile("ptesync":::"memory");
...@@ -365,7 +372,8 @@ static void pSeries_flush_hash_range(unsigned long context, ...@@ -365,7 +372,8 @@ static void pSeries_flush_hash_range(unsigned long context,
asm volatile("eieio; tlbsync; ptesync":::"memory"); asm volatile("eieio; tlbsync; ptesync":::"memory");
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags); if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
} }
} }
...@@ -384,8 +392,12 @@ void hpte_init_pSeries(void) ...@@ -384,8 +392,12 @@ void hpte_init_pSeries(void)
root = of_find_node_by_path("/"); root = of_find_node_by_path("/");
if (root) { if (root) {
model = get_property(root, "model", NULL); model = get_property(root, "model", NULL);
if (strcmp(model, "CHRP IBM,9076-N81")) if (!strcmp(model, "CHRP IBM,9076-N81")) {
ppc_md.flush_hash_range = pSeries_flush_hash_range; of_node_put(root);
return;
}
of_node_put(root); of_node_put(root);
} }
ppc_md.flush_hash_range = pSeries_flush_hash_range;
} }
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <linux/config.h> #include <linux/config.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/pci.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/mmu.h> #include <asm/mmu.h>
#include <asm/page.h> #include <asm/page.h>
...@@ -30,13 +31,13 @@ ...@@ -30,13 +31,13 @@
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/ppcdebug.h> #include <asm/ppcdebug.h>
#include <asm/iommu.h> #include <asm/iommu.h>
#include <linux/pci.h>
#include <asm/naca.h> #include <asm/naca.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm/hvcall.h> #include <asm/hvcall.h>
#include <asm/prom.h> #include <asm/prom.h>
#include <asm/abs_addr.h> #include <asm/abs_addr.h>
#include <asm/cputable.h>
/* in pSeries_hvCall.S */ /* in pSeries_hvCall.S */
EXPORT_SYMBOL(plpar_hcall); EXPORT_SYMBOL(plpar_hcall);
...@@ -146,7 +147,7 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, long npage ...@@ -146,7 +147,7 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, long npage
(u64)tcenum << 12, (u64)tcenum << 12,
tce.te_word ); tce.te_word );
if(rc && printk_ratelimit()) { if (rc && printk_ratelimit()) {
printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
printk("\tindex = 0x%lx\n", (u64)tbl->it_index); printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
printk("\ttcenum = 0x%lx\n", (u64)tcenum); printk("\ttcenum = 0x%lx\n", (u64)tcenum);
...@@ -559,12 +560,14 @@ void pSeries_lpar_flush_hash_range(unsigned long context, unsigned long number, ...@@ -559,12 +560,14 @@ void pSeries_lpar_flush_hash_range(unsigned long context, unsigned long number,
unsigned long flags; unsigned long flags;
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
for (i = 0; i < number; i++) for (i = 0; i < number; i++)
flush_hash_page(context, batch->addr[i], batch->pte[i], local); flush_hash_page(context, batch->addr[i], batch->pte[i], local);
spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
} }
void pSeries_lpar_mm_init(void) void pSeries_lpar_mm_init(void)
......
...@@ -125,11 +125,13 @@ _GLOBAL(__hash_page) ...@@ -125,11 +125,13 @@ _GLOBAL(__hash_page)
/* We eventually do the icache sync here (maybe inline that /* We eventually do the icache sync here (maybe inline that
* code rather than call a C function...) * code rather than call a C function...)
*/ */
BEGIN_FTR_SECTION
BEGIN_FTR_SECTION BEGIN_FTR_SECTION
mr r4,r30 mr r4,r30
mr r5,r7 mr r5,r7
bl .hash_page_do_lazy_icache bl .hash_page_do_lazy_icache
END_FTR_SECTION_IFSET(CPU_FTR_NOEXECUTE) END_FTR_SECTION_IFSET(CPU_FTR_NOEXECUTE)
END_FTR_SECTION_IFCLR(CPU_FTR_COHERENT_ICACHE)
/* At this point, r3 contains new PP bits, save them in /* At this point, r3 contains new PP bits, save them in
* place of "access" in the param area (sic) * place of "access" in the param area (sic)
......
...@@ -696,6 +696,8 @@ void __init mem_init(void) ...@@ -696,6 +696,8 @@ void __init mem_init(void)
*/ */
void flush_dcache_page(struct page *page) void flush_dcache_page(struct page *page)
{ {
if (cur_cpu_spec->cpu_features & CPU_FTR_COHERENT_ICACHE)
return;
/* avoid an atomic op if possible */ /* avoid an atomic op if possible */
if (test_bit(PG_arch_1, &page->flags)) if (test_bit(PG_arch_1, &page->flags))
clear_bit(PG_arch_1, &page->flags); clear_bit(PG_arch_1, &page->flags);
...@@ -705,6 +707,8 @@ void clear_user_page(void *page, unsigned long vaddr, struct page *pg) ...@@ -705,6 +707,8 @@ void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
{ {
clear_page(page); clear_page(page);
if (cur_cpu_spec->cpu_features & CPU_FTR_COHERENT_ICACHE)
return;
/* /*
* We shouldnt have to do this, but some versions of glibc * We shouldnt have to do this, but some versions of glibc
* require it (ld.so assumes zero filled pages are icache clean) * require it (ld.so assumes zero filled pages are icache clean)
...@@ -736,6 +740,9 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, ...@@ -736,6 +740,9 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
return; return;
#endif #endif
if (cur_cpu_spec->cpu_features & CPU_FTR_COHERENT_ICACHE)
return;
/* avoid an atomic op if possible */ /* avoid an atomic op if possible */
if (test_bit(PG_arch_1, &pg->flags)) if (test_bit(PG_arch_1, &pg->flags))
clear_bit(PG_arch_1, &pg->flags); clear_bit(PG_arch_1, &pg->flags);
...@@ -768,7 +775,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea, ...@@ -768,7 +775,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea,
cpumask_t tmp; cpumask_t tmp;
/* handle i-cache coherency */ /* handle i-cache coherency */
if (!(cur_cpu_spec->cpu_features & CPU_FTR_NOEXECUTE)) { if (!(cur_cpu_spec->cpu_features & CPU_FTR_COHERENT_ICACHE) &&
!(cur_cpu_spec->cpu_features & CPU_FTR_NOEXECUTE)) {
unsigned long pfn = pte_pfn(pte); unsigned long pfn = pte_pfn(pte);
if (pfn_valid(pfn)) { if (pfn_valid(pfn)) {
struct page *page = pfn_to_page(pfn); struct page *page = pfn_to_page(pfn);
......
#ifndef _PPC64_CACHEFLUSH_H #ifndef _PPC64_CACHEFLUSH_H
#define _PPC64_CACHEFLUSH_H #define _PPC64_CACHEFLUSH_H
/* Keep includes the same across arches. */
#include <linux/mm.h> #include <linux/mm.h>
#include <asm/cputable.h>
/* /*
* No cache flushing is required when address mappings are * No cache flushing is required when address mappings are
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#define flush_cache_vunmap(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0)
extern void flush_dcache_page(struct page *page); extern void flush_dcache_page(struct page *page);
extern void flush_icache_range(unsigned long, unsigned long); extern void __flush_icache_range(unsigned long, unsigned long);
extern void flush_icache_user_range(struct vm_area_struct *vma, extern void flush_icache_user_range(struct vm_area_struct *vma,
struct page *page, unsigned long addr, struct page *page, unsigned long addr,
int len); int len);
...@@ -35,4 +35,10 @@ do { memcpy(dst, src, len); \ ...@@ -35,4 +35,10 @@ do { memcpy(dst, src, len); \
extern void __flush_dcache_icache(void *page_va); extern void __flush_dcache_icache(void *page_va);
static inline void flush_icache_range(unsigned long start, unsigned long stop)
{
if (!(cur_cpu_spec->cpu_features & CPU_FTR_COHERENT_ICACHE))
__flush_icache_range(start, stop);
}
#endif /* _PPC64_CACHEFLUSH_H */ #endif /* _PPC64_CACHEFLUSH_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment