Commit 9b081e10 authored by Christophe Leroy's avatar Christophe Leroy Committed by Scott Wood

powerpc: port 64 bits pgtable_cache to 32 bits

Today powerpc64 uses a set of pgtable_caches while powerpc32 uses
standard pages when using 4k pages and a single pgtable_cache
if using other size pages.

In preparation of implementing huge pages on the 8xx, this patch
replaces the specific powerpc32 handling by the 64 bits approach.

This is done by:
* moving 64 bits pgtable_cache_add() and pgtable_cache_init()
in a new file called init-common.c
* modifying pgtable_cache_init() to also handle the case
without PMD
* removing the 32 bits version of pgtable_cache_add() and
pgtable_cache_init()
* copying related header contents from 64 bits into both the
book3s/32 and nohash/32 header files

On the 8xx, the following cache sizes will be used:
* 4k pages mode:
- PGT_CACHE(10) for PGD
- PGT_CACHE(3) for 512k hugepage tables
* 16k pages mode:
- PGT_CACHE(6) for PGD
- PGT_CACHE(7) for 512k hugepage tables
- PGT_CACHE(3) for 8M hugepage tables
Signed-off-by: default avatarChristophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: default avatarScott Wood <oss@buserror.net>
parent d7544424
......@@ -2,14 +2,42 @@
#define _ASM_POWERPC_BOOK3S_32_PGALLOC_H
#include <linux/threads.h>
#include <linux/slab.h>
/* For 32-bit, all levels of page tables are just drawn from get_free_page() */
#define MAX_PGTABLE_INDEX_SIZE 0
/*
* Functions that deal with pagetables that could be at any level of
* the table need to be passed an "index_size" so they know how to
* handle allocation. For PTE pages (which are linked to a struct
* page for now, and drawn from the main get_free_pages() pool), the
* allocation size will be (2^index_size * sizeof(pointer)) and
* allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
*
* The maximum index size needs to be big enough to allow any
* pagetable sizes we need, but small enough to fit in the low bits of
* any page table pointer. In other words all pagetables, even tiny
* ones, must be aligned to allow at least enough low 0 bits to
* contain this value. This value is also used as a mask, so it must
* be one less than a power of two.
*/
#define MAX_PGTABLE_INDEX_SIZE 0xf
extern void __bad_pte(pmd_t *pmd);
extern pgd_t *pgd_alloc(struct mm_struct *mm);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
extern struct kmem_cache *pgtable_cache[];
#define PGT_CACHE(shift) ({ \
BUG_ON(!(shift)); \
pgtable_cache[(shift) - 1]; \
})
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
}
/*
* We don't have any real pmd's, and this code never triggers because
......@@ -68,8 +96,12 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
static inline void pgtable_free(void *table, unsigned index_size)
{
BUG_ON(index_size); /* 32-bit doesn't use this */
free_page((unsigned long)table);
if (!index_size) {
free_page((unsigned long)table);
} else {
BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
kmem_cache_free(PGT_CACHE(index_size), table);
}
}
#define check_pgt_cache() do { } while (0)
......
......@@ -8,6 +8,23 @@
/* And here we include common definitions */
#include <asm/pte-common.h>
#define PTE_INDEX_SIZE PTE_SHIFT
#define PMD_INDEX_SIZE 0
#define PUD_INDEX_SIZE 0
#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
#ifndef __ASSEMBLY__
#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
#define PMD_TABLE_SIZE 0
#define PUD_TABLE_SIZE 0
#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
#endif /* __ASSEMBLY__ */
#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
/*
* The normal case is that PTEs are 32-bits and we have a 1-page
* 1024-entry pgdir pointing to 1-page 1024-entry PTE pages. -- paulus
......@@ -19,14 +36,10 @@
* -Matt
*/
/* PGDIR_SHIFT determines what a top-level page table entry can map */
#define PGDIR_SHIFT (PAGE_SHIFT + PTE_SHIFT)
#define PGDIR_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define PTRS_PER_PTE (1 << PTE_SHIFT)
#define PTRS_PER_PMD 1
#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT))
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
/*
* This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
......@@ -82,12 +95,8 @@
extern unsigned long ioremap_bot;
/*
* entries per page directory level: our page-table tree is two-level, so
* we don't really have any PMD directory.
*/
#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_SHIFT)
#define PGD_TABLE_SIZE (sizeof(pgd_t) << (32 - PGDIR_SHIFT))
/* Bits to mask out from a PGD to get to the PUD page */
#define PGD_MASKED_BITS 0
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \
......@@ -283,15 +292,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
#ifndef CONFIG_PPC_4K_PAGES
void pgtable_cache_init(void);
#else
/*
* No page table caches to initialise
*/
#define pgtable_cache_init() do { } while (0)
#endif
extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
pmd_t **pmdp);
......
......@@ -789,9 +789,6 @@ extern struct page *pgd_page(pgd_t pgd);
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
void pgtable_cache_init(void);
static inline int map_kernel_page(unsigned long ea, unsigned long pa,
unsigned long flags)
{
......
......@@ -2,14 +2,42 @@
#define _ASM_POWERPC_PGALLOC_32_H
#include <linux/threads.h>
#include <linux/slab.h>
/* For 32-bit, all levels of page tables are just drawn from get_free_page() */
#define MAX_PGTABLE_INDEX_SIZE 0
/*
* Functions that deal with pagetables that could be at any level of
* the table need to be passed an "index_size" so they know how to
* handle allocation. For PTE pages (which are linked to a struct
* page for now, and drawn from the main get_free_pages() pool), the
* allocation size will be (2^index_size * sizeof(pointer)) and
* allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
*
* The maximum index size needs to be big enough to allow any
* pagetable sizes we need, but small enough to fit in the low bits of
* any page table pointer. In other words all pagetables, even tiny
* ones, must be aligned to allow at least enough low 0 bits to
* contain this value. This value is also used as a mask, so it must
* be one less than a power of two.
*/
#define MAX_PGTABLE_INDEX_SIZE 0xf
extern void __bad_pte(pmd_t *pmd);
extern pgd_t *pgd_alloc(struct mm_struct *mm);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
extern struct kmem_cache *pgtable_cache[];
#define PGT_CACHE(shift) ({ \
BUG_ON(!(shift)); \
pgtable_cache[(shift) - 1]; \
})
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
}
/*
* We don't have any real pmd's, and this code never triggers because
......@@ -68,8 +96,12 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
static inline void pgtable_free(void *table, unsigned index_size)
{
BUG_ON(index_size); /* 32-bit doesn't use this */
free_page((unsigned long)table);
if (!index_size) {
free_page((unsigned long)table);
} else {
BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
kmem_cache_free(PGT_CACHE(index_size), table);
}
}
#define check_pgt_cache() do { } while (0)
......
......@@ -16,6 +16,23 @@ extern int icache_44x_need_flush;
#endif /* __ASSEMBLY__ */
#define PTE_INDEX_SIZE PTE_SHIFT
#define PMD_INDEX_SIZE 0
#define PUD_INDEX_SIZE 0
#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
#ifndef __ASSEMBLY__
#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
#define PMD_TABLE_SIZE 0
#define PUD_TABLE_SIZE 0
#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
#endif /* __ASSEMBLY__ */
#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
/*
* The normal case is that PTEs are 32-bits and we have a 1-page
* 1024-entry pgdir pointing to 1-page 1024-entry PTE pages. -- paulus
......@@ -27,22 +44,12 @@ extern int icache_44x_need_flush;
* -Matt
*/
/* PGDIR_SHIFT determines what a top-level page table entry can map */
#define PGDIR_SHIFT (PAGE_SHIFT + PTE_SHIFT)
#define PGDIR_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
/*
* entries per page directory level: our page-table tree is two-level, so
* we don't really have any PMD directory.
*/
#ifndef __ASSEMBLY__
#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_SHIFT)
#define PGD_TABLE_SIZE (sizeof(pgd_t) << (32 - PGDIR_SHIFT))
#endif /* __ASSEMBLY__ */
#define PTRS_PER_PTE (1 << PTE_SHIFT)
#define PTRS_PER_PMD 1
#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT))
/* Bits to mask out from a PGD to get to the PUD page */
#define PGD_MASKED_BITS 0
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
#define FIRST_USER_ADDRESS 0UL
......@@ -328,15 +335,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
#ifndef CONFIG_PPC_4K_PAGES
void pgtable_cache_init(void);
#else
/*
* No page table caches to initialise
*/
#define pgtable_cache_init() do { } while (0)
#endif
extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
pmd_t **pmdp);
......
......@@ -346,8 +346,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
#define __swp_entry_to_pte(x) __pte((x).val)
void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
void pgtable_cache_init(void);
extern int map_kernel_page(unsigned long ea, unsigned long pa,
unsigned long flags);
extern int __meminit vmemmap_create_mapping(unsigned long start,
......
......@@ -78,6 +78,8 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
unsigned long vmalloc_to_phys(void *vmalloc_addr);
void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
void pgtable_cache_init(void);
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PGTABLE_H */
......@@ -7,7 +7,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
obj-y := fault.o mem.o pgtable.o mmap.o \
init_$(BITS).o pgtable_$(BITS).o
init_$(BITS).o pgtable_$(BITS).o \
init-common.o
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
tlb_nohash_low.o
obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
......
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
*
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
* Copyright (C) 1996 Paul Mackerras
*
* Derived from "arch/i386/mm/init.c"
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
*
* Dave Engebretsen <engebret@us.ibm.com>
* Rework for PPC64 port.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*/
#undef DEBUG
#include <linux/string.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
static void pgd_ctor(void *addr)
{
memset(addr, 0, PGD_TABLE_SIZE);
}
static void pud_ctor(void *addr)
{
memset(addr, 0, PUD_TABLE_SIZE);
}
static void pmd_ctor(void *addr)
{
memset(addr, 0, PMD_TABLE_SIZE);
}
struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
/*
* Create a kmem_cache() for pagetables. This is not used for PTE
* pages - they're linked to struct page, come from the normal free
* pages pool and have a different entry size (see real_pte_t) to
* everything else. Caches created by this function are used for all
* the higher level pagetables, and for hugepage pagetables.
*/
void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
{
char *name;
unsigned long table_size = sizeof(void *) << shift;
unsigned long align = table_size;
/* When batching pgtable pointers for RCU freeing, we store
* the index size in the low bits. Table alignment must be
* big enough to fit it.
*
* Likewise, hugeapge pagetable pointers contain a (different)
* shift value in the low bits. All tables must be aligned so
* as to leave enough 0 bits in the address to contain it. */
unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1,
HUGEPD_SHIFT_MASK + 1);
struct kmem_cache *new;
/* It would be nice if this was a BUILD_BUG_ON(), but at the
* moment, gcc doesn't seem to recognize is_power_of_2 as a
* constant expression, so so much for that. */
BUG_ON(!is_power_of_2(minalign));
BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE));
if (PGT_CACHE(shift))
return; /* Already have a cache of this size */
align = max_t(unsigned long, align, minalign);
name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
new = kmem_cache_create(name, table_size, align, 0, ctor);
kfree(name);
pgtable_cache[shift - 1] = new;
pr_debug("Allocated pgtable cache for order %d\n", shift);
}
void pgtable_cache_init(void)
{
pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
if (PMD_INDEX_SIZE && !PGT_CACHE(PMD_INDEX_SIZE))
pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
/*
* In all current configs, when the PUD index exists it's the
* same size as either the pgd or pmd index except with THP enabled
* on book3s 64
*/
if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
if (!PGT_CACHE(PGD_INDEX_SIZE))
panic("Couldn't allocate pgd cache");
if (PMD_INDEX_SIZE && !PGT_CACHE(PMD_INDEX_SIZE))
panic("Couldn't allocate pmd pgtable caches");
if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
panic("Couldn't allocate pud pgtable caches");
}
......@@ -80,83 +80,6 @@ EXPORT_SYMBOL_GPL(memstart_addr);
phys_addr_t kernstart_addr;
EXPORT_SYMBOL_GPL(kernstart_addr);
static void pgd_ctor(void *addr)
{
memset(addr, 0, PGD_TABLE_SIZE);
}
static void pud_ctor(void *addr)
{
memset(addr, 0, PUD_TABLE_SIZE);
}
static void pmd_ctor(void *addr)
{
memset(addr, 0, PMD_TABLE_SIZE);
}
struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
/*
* Create a kmem_cache() for pagetables. This is not used for PTE
* pages - they're linked to struct page, come from the normal free
* pages pool and have a different entry size (see real_pte_t) to
* everything else. Caches created by this function are used for all
* the higher level pagetables, and for hugepage pagetables.
*/
void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
{
char *name;
unsigned long table_size = sizeof(void *) << shift;
unsigned long align = table_size;
/* When batching pgtable pointers for RCU freeing, we store
* the index size in the low bits. Table alignment must be
* big enough to fit it.
*
* Likewise, hugeapge pagetable pointers contain a (different)
* shift value in the low bits. All tables must be aligned so
* as to leave enough 0 bits in the address to contain it. */
unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1,
HUGEPD_SHIFT_MASK + 1);
struct kmem_cache *new;
/* It would be nice if this was a BUILD_BUG_ON(), but at the
* moment, gcc doesn't seem to recognize is_power_of_2 as a
* constant expression, so so much for that. */
BUG_ON(!is_power_of_2(minalign));
BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE));
if (PGT_CACHE(shift))
return; /* Already have a cache of this size */
align = max_t(unsigned long, align, minalign);
name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
new = kmem_cache_create(name, table_size, align, 0, ctor);
kfree(name);
pgtable_cache[shift - 1] = new;
pr_debug("Allocated pgtable cache for order %d\n", shift);
}
void pgtable_cache_init(void)
{
pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
/*
* In all current configs, when the PUD index exists it's the
* same size as either the pgd or pmd index except with THP enabled
* on book3s 64
*/
if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX))
panic("Couldn't allocate pgtable caches");
if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
panic("Couldn't allocate pud pgtable caches");
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
* Given an address within the vmemmap, determine the pfn of the page that
......
......@@ -42,43 +42,6 @@ EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
extern char etext[], _stext[], _sinittext[], _einittext[];
#define PGDIR_ORDER (32 + PGD_T_LOG2 - PGDIR_SHIFT)
#ifndef CONFIG_PPC_4K_PAGES
static struct kmem_cache *pgtable_cache;
void pgtable_cache_init(void)
{
pgtable_cache = kmem_cache_create("PGDIR cache", 1 << PGDIR_ORDER,
1 << PGDIR_ORDER, 0, NULL);
if (pgtable_cache == NULL)
panic("Couldn't allocate pgtable caches");
}
#endif
pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *ret;
/* pgdir take page or two with 4K pages and a page fraction otherwise */
#ifndef CONFIG_PPC_4K_PAGES
ret = kmem_cache_alloc(pgtable_cache, GFP_KERNEL | __GFP_ZERO);
#else
ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
PGDIR_ORDER - PAGE_SHIFT);
#endif
return ret;
}
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
#ifndef CONFIG_PPC_4K_PAGES
kmem_cache_free(pgtable_cache, (void *)pgd);
#else
free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT);
#endif
}
__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
pte_t *pte;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment