Commit 008d0de4 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] ppc64: __hash_page rewrite, from Ben Herrenschmidt

From: Anton Blanchard <anton@samba.org>

Rewrite __hash_page function in assembly in such a way we don't need
the page table lock any more. We now rely on a BUSY bit in the linux
PTE on which we spin on when doing an update of the PTE
parent d4361b6c
......@@ -7,7 +7,7 @@ extra-y := head.o vmlinux.lds.s
obj-y := setup.o entry.o traps.o irq.o idle.o \
time.o process.o signal.o syscalls.o misc.o ptrace.o \
align.o semaphore.o bitops.o stab.o htab.o pacaData.o \
align.o semaphore.o bitops.o stab.o pacaData.o \
udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \
ptrace32.o signal32.o pmc.o rtc.o init_task.o \
lmb.o cputable.o
......
......@@ -213,6 +213,10 @@ void setup_system(unsigned long r3, unsigned long r4, unsigned long r5,
#endif
}
#endif
/* Finish initializing the hash table (do the dynamic
* patching for the fast-path hashtable.S code)
*/
htab_finish_init();
printk("Starting Linux PPC64 %s\n", UTS_RELEASE);
......
......@@ -53,7 +53,6 @@ SECTIONS
*(.data1)
*(.sdata)
*(.sdata2)
*(.got.plt) *(.got)
*(.dynamic)
CONSTRUCTORS
}
......@@ -126,6 +125,7 @@ SECTIONS
/* freed after init ends here */
__toc_start = .;
.got : { *(.got.plt) *(.got) }
.toc : { *(.toc) }
. = ALIGN(4096);
__toc_end = .;
......
......@@ -4,6 +4,6 @@
EXTRA_CFLAGS += -mno-minimal-toc
obj-y := fault.o init.o extable.o imalloc.o
obj-y := fault.o init.o extable.o imalloc.o hash_utils.o hash_low.o
obj-$(CONFIG_DISCONTIGMEM) += numa.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
......@@ -27,6 +27,7 @@
#include <linux/sysctl.h>
#include <linux/ctype.h>
#include <linux/cache.h>
#include <linux/init.h>
#include <asm/ppcdebug.h>
#include <asm/processor.h>
......@@ -48,7 +49,6 @@
#include <asm/tlb.h>
#include <asm/cacheflush.h>
#include <asm/cputable.h>
/*
* Note: pte --> Linux PTE
* HPTE --> PowerPC Hashed Page Table Entry
......@@ -68,8 +68,7 @@ extern unsigned long _SDR1;
#define KB (1024)
#define MB (1024*KB)
static inline void
loop_forever(void)
static inline void loop_forever(void)
{
volatile unsigned long x = 1;
for(;x;x|=1)
......@@ -77,8 +76,7 @@ loop_forever(void)
}
#ifdef CONFIG_PPC_PSERIES
static inline void
create_pte_mapping(unsigned long start, unsigned long end,
static inline void create_pte_mapping(unsigned long start, unsigned long end,
unsigned long mode, int large)
{
unsigned long addr;
......@@ -120,8 +118,7 @@ create_pte_mapping(unsigned long start, unsigned long end,
}
}
void
htab_initialize(void)
void __init htab_initialize(void)
{
unsigned long table, htab_size_bytes;
unsigned long pteg_count;
......@@ -186,189 +183,36 @@ htab_initialize(void)
#endif
/*
* find_linux_pte returns the address of a linux pte for a given
* effective address and directory. If not found, it returns zero.
* Called by asm hashtable.S for doing lazy icache flush
*/
pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
{
pgd_t *pg;
pmd_t *pm;
pte_t *pt = NULL;
pte_t pte;
pg = pgdir + pgd_index(ea);
if (!pgd_none(*pg)) {
pm = pmd_offset(pg, ea);
if (pmd_present(*pm)) {
pt = pte_offset_kernel(pm, ea);
pte = *pt;
if (!pte_present(pte))
pt = NULL;
}
}
return pt;
}
static inline unsigned long computeHptePP(unsigned long pte)
unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
{
return (pte & _PAGE_USER) |
(((pte & _PAGE_USER) >> 1) &
((~((pte >> 2) & /* _PAGE_RW */
(pte >> 7))) & /* _PAGE_DIRTY */
1));
}
/*
* Handle a fault by adding an HPTE. If the address can't be determined
* to be valid via Linux page tables, return 1. If handled return 0
*/
int __hash_page(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep, unsigned long trap, int local)
{
unsigned long va, vpn;
unsigned long newpp, prpn;
unsigned long hpteflags;
long slot;
pte_t old_pte, new_pte;
/* XXX fix for large ptes */
int large = 0;
/* Search the Linux page table for a match with va */
va = (vsid << 28) | (ea & 0x0fffffff);
if (large)
vpn = va >> LARGE_PAGE_SHIFT;
else
vpn = va >> PAGE_SHIFT;
/*
* If no pte found or not present, send the problem up to
* do_page_fault
*/
if (unlikely(!ptep || !pte_present(*ptep)))
return 1;
/*
* Check the user's access rights to the page. If access should be
* prevented then send the problem up to do_page_fault.
*/
access |= _PAGE_PRESENT;
if (unlikely(access & ~(pte_val(*ptep))))
return 1;
/*
* At this point, we have a pte (old_pte) which can be used to build
* or update an HPTE. There are 2 cases:
*
* 1. There is a valid (present) pte with no associated HPTE (this is
* the most common case)
* 2. There is a valid (present) pte with an associated HPTE. The
* current values of the pp bits in the HPTE prevent access
* because we are doing software DIRTY bit management and the
* page is currently not DIRTY.
*/
old_pte = *ptep;
new_pte = old_pte;
/* If the attempted access was a store */
if (access & _PAGE_RW)
pte_val(new_pte) |= _PAGE_ACCESSED | _PAGE_DIRTY;
else
pte_val(new_pte) |= _PAGE_ACCESSED;
newpp = computeHptePP(pte_val(new_pte));
struct page *page;
#define PPC64_HWNOEXEC (1 << 2)
/* We do lazy icache flushing on cpus that support it */
if (unlikely((cur_cpu_spec->cpu_features & CPU_FTR_NOEXECUTE)
&& pfn_valid(pte_pfn(new_pte)))) {
struct page *page = pte_page(new_pte);
if (!pfn_valid(pte_pfn(pte)))
return pp;
page = pte_page(pte);
/* page is dirty */
if (!PageReserved(page) &&
!test_bit(PG_arch_1, &page->flags)) {
if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
if (trap == 0x400) {
__flush_dcache_icache(page_address(page));
set_bit(PG_arch_1, &page->flags);
} else {
newpp |= PPC64_HWNOEXEC;
}
}
}
/* Check if pte already has an hpte (case 2) */
if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
/* There MIGHT be an HPTE for this pte */
unsigned long hash, slot, secondary;
hash = hpt_hash(vpn, large);
secondary = (pte_val(old_pte) & _PAGE_SECONDARY) >> 15;
if (secondary)
hash = ~hash;
slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
if (ppc_md.hpte_updatepp(slot, newpp, va, large, local) == -1)
pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
else
if (!pte_same(old_pte, new_pte))
*ptep = new_pte;
}
if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) {
unsigned long hash = hpt_hash(vpn, large);
unsigned long hpte_group;
prpn = pte_val(old_pte) >> PTE_SHIFT;
repeat:
hpte_group = ((hash & htab_data.htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
/* Update the linux pte with the HPTE slot */
pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
pte_val(new_pte) |= _PAGE_HASHPTE;
/* copy appropriate flags from linux pte */
hpteflags = (pte_val(new_pte) & 0x1f8) | newpp;
slot = ppc_md.hpte_insert(hpte_group, va, prpn, 0,
hpteflags, 0, large);
/* Primary is full, try the secondary */
if (unlikely(slot == -1)) {
pte_val(new_pte) |= 1 << 15;
hpte_group = ((~hash & htab_data.htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
slot = ppc_md.hpte_insert(hpte_group, va, prpn,
1, hpteflags, 0, large);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
ppc_md.hpte_remove(hpte_group);
goto repeat;
} else
pp |= PPC64_HWNOEXEC;
}
}
if (unlikely(slot == -2))
panic("hash_page: pte_insert failed\n");
pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
return pp;
}
/*
* No need to use ldarx/stdcx here because all who
* might be updating the pte will hold the
* page_table_lock or the hash_table_lock
* (we hold both)
/*
* Called by asm hashtable.S in case of critical insert failure
*/
*ptep = new_pte;
}
return 0;
void htab_insert_failure(void)
{
panic("hash_page: pte_insert failed\n");
}
int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
......@@ -429,23 +273,20 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
if (pgdir == NULL)
return 1;
/*
* Lock the Linux page table to prevent mmap and kswapd
* from modifying entries while we search and update
*/
spin_lock(&mm->page_table_lock);
tmp = cpumask_of_cpu(smp_processor_id());
if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
local = 1;
/* Is this a huge page ? */
if (unlikely(in_hugepage_area(mm->context, ea)))
ret = hash_huge_page(mm, access, ea, vsid, local);
if (ret < 0) {
else {
ptep = find_linux_pte(pgdir, ea);
if (ptep == NULL)
return 1;
ret = __hash_page(ea, access, vsid, ptep, trap, local);
}
spin_unlock(&mm->page_table_lock);
return ret;
}
......@@ -492,3 +333,26 @@ void flush_hash_range(unsigned long context, unsigned long number, int local)
local);
}
}
static inline void make_bl(unsigned int *insn_addr, void *func)
{
unsigned long funcp = *((unsigned long *)func);
int offset = funcp - (unsigned long)insn_addr;
*insn_addr = (unsigned int)(0x48000001 | (offset & 0x03fffffc));
flush_icache_range((unsigned long)insn_addr, 4+
(unsigned long)insn_addr);
}
void __init htab_finish_init(void)
{
extern unsigned int *htab_call_hpte_insert1;
extern unsigned int *htab_call_hpte_insert2;
extern unsigned int *htab_call_hpte_remove;
extern unsigned int *htab_call_hpte_updatepp;
make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp);
}
......@@ -652,13 +652,9 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long va, vpn;
int is_write;
hugepte_t old_pte, new_pte;
unsigned long hpteflags, prpn;
unsigned long hpteflags, prpn, flags;
long slot;
/* Is this for us? */
if (!in_hugepage_area(mm->context, ea))
return -1;
ea &= ~(HPAGE_SIZE-1);
/* We have to find the first hugepte in the batch, since
......@@ -698,6 +694,8 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
* page is currently not DIRTY.
*/
spin_lock_irqsave(&mm->page_table_lock, flags);
old_pte = *ptep;
new_pte = old_pte;
......@@ -769,6 +767,8 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
*ptep = new_pte;
}
spin_unlock_irqrestore(&mm->page_table_lock, flags);
return 0;
}
......
......@@ -901,10 +901,6 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
flush_icache_range(maddr, maddr + len);
}
extern pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea);
int __hash_page(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep, unsigned long trap, int local);
/*
* This is called at the end of handling a user page fault, when the
* fault has been handled by updating a PTE in the linux page tables.
......@@ -944,6 +940,9 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea,
return;
ptep = find_linux_pte(pgdir, ea);
if (!ptep)
return;
vsid = get_vsid(vma->vm_mm->context, ea);
tmp = cpumask_of_cpu(smp_processor_id());
......
......@@ -13,6 +13,8 @@
#ifndef _PPC64_MMU_H_
#define _PPC64_MMU_H_
#include <asm/page.h>
#ifndef __ASSEMBLY__
/* Default "unsigned long" context */
......@@ -245,6 +247,16 @@ static inline void tlbiel(unsigned long va)
asm volatile("ptesync": : :"memory");
}
/*
* Handle a fault by adding an HPTE. If the address can't be determined
* to be valid via Linux page tables, return 1. If handled return 0
*/
extern int __hash_page(unsigned long ea, unsigned long access,
unsigned long vsid, pte_t *ptep, unsigned long trap,
int local);
extern void htab_finish_init(void);
#endif /* __ASSEMBLY__ */
/*
......
......@@ -7,6 +7,7 @@
*/
#ifndef __ASSEMBLY__
#include <linux/stddef.h>
#include <asm/processor.h> /* For TASK_SIZE */
#include <asm/mmu.h>
#include <asm/page.h>
......@@ -74,22 +75,23 @@
* Bits in a linux-style PTE. These match the bits in the
* (hardware-defined) PowerPC PTE as closely as possible.
*/
#define _PAGE_PRESENT 0x001UL /* software: pte contains a translation */
#define _PAGE_USER 0x002UL /* matches one of the PP bits */
#define _PAGE_RW 0x004UL /* software: user write access allowed */
#define _PAGE_GUARDED 0x008UL
#define _PAGE_COHERENT 0x010UL /* M: enforce memory coherence (SMP systems) */
#define _PAGE_NO_CACHE 0x020UL /* I: cache inhibit */
#define _PAGE_WRITETHRU 0x040UL /* W: cache write-through */
#define _PAGE_DIRTY 0x080UL /* C: page changed */
#define _PAGE_ACCESSED 0x100UL /* R: page referenced */
#define _PAGE_FILE 0x200UL /* software: pte holds file offset */
#define _PAGE_HASHPTE 0x400UL /* software: pte has an associated HPTE */
#define _PAGE_EXEC 0x800UL /* software: i-cache coherence required */
#define _PAGE_SECONDARY 0x8000UL /* software: HPTE is in secondary group */
#define _PAGE_GROUP_IX 0x7000UL /* software: HPTE index within group */
#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */
#define _PAGE_USER 0x0002 /* matches one of the PP bits */
#define _PAGE_FILE 0x0002 /* (!present only) software: pte holds file offset */
#define _PAGE_RW 0x0004 /* software: user write access allowed */
#define _PAGE_GUARDED 0x0008
#define _PAGE_COHERENT 0x0010 /* M: enforce memory coherence (SMP systems) */
#define _PAGE_NO_CACHE 0x0020 /* I: cache inhibit */
#define _PAGE_WRITETHRU 0x0040 /* W: cache write-through */
#define _PAGE_DIRTY 0x0080 /* C: page changed */
#define _PAGE_ACCESSED 0x0100 /* R: page referenced */
#define _PAGE_EXEC 0x0200 /* software: i-cache coherence required */
#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */
#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */
/* Bits 0x7000 identify the index within an HPT Group */
#define _PAGE_HPTEFLAGS (_PAGE_HASHPTE | _PAGE_SECONDARY | _PAGE_GROUP_IX)
#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_SECONDARY | _PAGE_GROUP_IX)
/* PAGE_MASK gives the right answer below, but only by accident */
/* It should be preserving the high 48 bits and then specifically */
/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
......@@ -157,8 +159,10 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
#define _PMD_HUGEPAGE 0x00000001U
#define HUGEPTE_BATCH_SIZE (1<<(HPAGE_SHIFT-PMD_SHIFT))
#ifndef __ASSEMBLY__
int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long ea, unsigned long vsid, int local);
#endif /* __ASSEMBLY__ */
#define HAVE_ARCH_UNMAPPED_AREA
#else
......@@ -291,12 +295,14 @@ static inline unsigned long pte_update( pte_t *p, unsigned long clr,
__asm__ __volatile__(
"1: ldarx %0,0,%3 # pte_update\n\
andi. %1,%0,%7\n\
bne- 1b \n\
andc %1,%0,%4 \n\
or %1,%1,%5 \n\
stdcx. %1,0,%3 \n\
bne- 1b"
: "=&r" (old), "=&r" (tmp), "=m" (*p)
: "r" (p), "r" (clr), "r" (set), "m" (*p)
: "r" (p), "r" (clr), "r" (set), "m" (*p), "i" (_PAGE_BUSY)
: "cc" );
return old;
}
......@@ -422,5 +428,31 @@ long pSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
unsigned long prpn, int secondary,
unsigned long hpteflags, int bolted, int large);
/*
* find_linux_pte returns the address of a linux pte for a given
* effective address and directory. If not found, it returns zero.
*/
static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
{
pgd_t *pg;
pmd_t *pm;
pte_t *pt = NULL;
pte_t pte;
pg = pgdir + pgd_index(ea);
if (!pgd_none(*pg)) {
pm = pmd_offset(pg, ea);
if (pmd_present(*pm)) {
pt = pte_offset_kernel(pm, ea);
pte = *pt;
if (!pte_present(pte))
pt = NULL;
}
}
return pt;
}
#endif /* __ASSEMBLY__ */
#endif /* _PPC64_PGTABLE_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment