Commit c1a86d3b authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] ppc64: Add some POWER5 specific optimisations

From: Anton Blanchard <anton@samba.org>

Add some POWER5 specific optimisations:
- icache is coherent, no need to explicitly flush
- tlbie lock no longer required
parent 01007b4e
......@@ -132,7 +132,7 @@ _GLOBAL(flush_instruction_cache)
* flush all bytes from start through stop-1 inclusive
*/
_GLOBAL(flush_icache_range)
_GLOBAL(__flush_icache_range)
/*
* Flush the data cache to memory
......
......@@ -221,8 +221,10 @@ static long pSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
if ((cur_cpu_spec->cpu_features & CPU_FTR_TLBIEL) && !large && local) {
tlbiel(va);
} else {
if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_lock_irqsave(&pSeries_tlbie_lock, flags);
tlbie(va, large);
if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
}
......@@ -255,8 +257,10 @@ static void pSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
set_pp_bit(newpp, hptep);
/* Ensure it is out of the tlb too */
if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_lock_irqsave(&pSeries_tlbie_lock, flags);
tlbie(va, 0);
if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
}
......@@ -287,8 +291,10 @@ static void pSeries_hpte_invalidate(unsigned long slot, unsigned long va,
if ((cur_cpu_spec->cpu_features & CPU_FTR_TLBIEL) && !large && local) {
tlbiel(va);
} else {
if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_lock_irqsave(&pSeries_tlbie_lock, flags);
tlbie(va, large);
if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
}
}
......@@ -356,6 +362,7 @@ static void pSeries_flush_hash_range(unsigned long context,
asm volatile("ptesync":::"memory");
} else {
/* XXX double check that it is safe to take this late */
if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_lock_irqsave(&pSeries_tlbie_lock, flags);
asm volatile("ptesync":::"memory");
......@@ -365,6 +372,7 @@ static void pSeries_flush_hash_range(unsigned long context,
asm volatile("eieio; tlbsync; ptesync":::"memory");
if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
}
}
......@@ -384,8 +392,12 @@ void hpte_init_pSeries(void)
root = of_find_node_by_path("/");
if (root) {
model = get_property(root, "model", NULL);
if (strcmp(model, "CHRP IBM,9076-N81"))
ppc_md.flush_hash_range = pSeries_flush_hash_range;
if (!strcmp(model, "CHRP IBM,9076-N81")) {
of_node_put(root);
return;
}
of_node_put(root);
}
ppc_md.flush_hash_range = pSeries_flush_hash_range;
}
......@@ -21,6 +21,7 @@
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/pci.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/page.h>
......@@ -30,13 +31,13 @@
#include <asm/mmu_context.h>
#include <asm/ppcdebug.h>
#include <asm/iommu.h>
#include <linux/pci.h>
#include <asm/naca.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/hvcall.h>
#include <asm/prom.h>
#include <asm/abs_addr.h>
#include <asm/cputable.h>
/* in pSeries_hvCall.S */
EXPORT_SYMBOL(plpar_hcall);
......@@ -146,7 +147,7 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, long npage
(u64)tcenum << 12,
tce.te_word );
if(rc && printk_ratelimit()) {
if (rc && printk_ratelimit()) {
printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
printk("\ttcenum = 0x%lx\n", (u64)tcenum);
......@@ -559,11 +560,13 @@ void pSeries_lpar_flush_hash_range(unsigned long context, unsigned long number,
unsigned long flags;
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
for (i = 0; i < number; i++)
flush_hash_page(context, batch->addr[i], batch->pte[i], local);
if (!(cur_cpu_spec->cpu_features & CPU_FTR_LOCKLESS_TLBIE))
spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
}
......
......@@ -125,11 +125,13 @@ _GLOBAL(__hash_page)
/* We eventually do the icache sync here (maybe inline that
* code rather than call a C function...)
*/
BEGIN_FTR_SECTION
BEGIN_FTR_SECTION
mr r4,r30
mr r5,r7
bl .hash_page_do_lazy_icache
END_FTR_SECTION_IFSET(CPU_FTR_NOEXECUTE)
END_FTR_SECTION_IFCLR(CPU_FTR_COHERENT_ICACHE)
/* At this point, r3 contains new PP bits, save them in
* place of "access" in the param area (sic)
......
......@@ -696,6 +696,8 @@ void __init mem_init(void)
*/
void flush_dcache_page(struct page *page)
{
if (cur_cpu_spec->cpu_features & CPU_FTR_COHERENT_ICACHE)
return;
/* avoid an atomic op if possible */
if (test_bit(PG_arch_1, &page->flags))
clear_bit(PG_arch_1, &page->flags);
......@@ -705,6 +707,8 @@ void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
{
clear_page(page);
if (cur_cpu_spec->cpu_features & CPU_FTR_COHERENT_ICACHE)
return;
/*
* We shouldnt have to do this, but some versions of glibc
* require it (ld.so assumes zero filled pages are icache clean)
......@@ -736,6 +740,9 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
return;
#endif
if (cur_cpu_spec->cpu_features & CPU_FTR_COHERENT_ICACHE)
return;
/* avoid an atomic op if possible */
if (test_bit(PG_arch_1, &pg->flags))
clear_bit(PG_arch_1, &pg->flags);
......@@ -768,7 +775,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea,
cpumask_t tmp;
/* handle i-cache coherency */
if (!(cur_cpu_spec->cpu_features & CPU_FTR_NOEXECUTE)) {
if (!(cur_cpu_spec->cpu_features & CPU_FTR_COHERENT_ICACHE) &&
!(cur_cpu_spec->cpu_features & CPU_FTR_NOEXECUTE)) {
unsigned long pfn = pte_pfn(pte);
if (pfn_valid(pfn)) {
struct page *page = pfn_to_page(pfn);
......
#ifndef _PPC64_CACHEFLUSH_H
#define _PPC64_CACHEFLUSH_H
/* Keep includes the same across arches. */
#include <linux/mm.h>
#include <asm/cputable.h>
/*
* No cache flushing is required when address mappings are
......@@ -18,7 +18,7 @@
#define flush_cache_vunmap(start, end) do { } while (0)
extern void flush_dcache_page(struct page *page);
extern void flush_icache_range(unsigned long, unsigned long);
extern void __flush_icache_range(unsigned long, unsigned long);
extern void flush_icache_user_range(struct vm_area_struct *vma,
struct page *page, unsigned long addr,
int len);
......@@ -35,4 +35,10 @@ do { memcpy(dst, src, len); \
extern void __flush_dcache_icache(void *page_va);
static inline void flush_icache_range(unsigned long start, unsigned long stop)
{
if (!(cur_cpu_spec->cpu_features & CPU_FTR_COHERENT_ICACHE))
__flush_icache_range(start, stop);
}
#endif /* _PPC64_CACHEFLUSH_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment