Commit c2bd1247 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] put ia32 pgds and pmds back into slab

From: William Lee Irwin III <wli@holomorphy.com>

This optimisation was reverted when I was removing all users of page->list.
Bill fixed it up, so unrevert it again.
parent 24b5a6a3
...@@ -523,20 +523,30 @@ void __init mem_init(void) ...@@ -523,20 +523,30 @@ void __init mem_init(void)
#endif #endif
} }
#ifdef CONFIG_X86_PAE kmem_cache_t *pgd_cache;
struct kmem_cache_s *pae_pgd_cachep; kmem_cache_t *pmd_cache;
void __init pgtable_cache_init(void) void __init pgtable_cache_init(void)
{ {
/* if (PTRS_PER_PMD > 1) {
* PAE pgds must be 16-byte aligned: pmd_cache = kmem_cache_create("pmd",
*/ PTRS_PER_PMD*sizeof(pmd_t),
pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0, PTRS_PER_PMD*sizeof(pmd_t),
SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, NULL, NULL); 0,
if (!pae_pgd_cachep) pmd_ctor,
panic("init_pae(): Cannot alloc pae_pgd SLAB cache"); NULL);
if (!pmd_cache)
panic("pgtable_cache_init(): cannot create pmd cache");
}
pgd_cache = kmem_cache_create("pgd",
PTRS_PER_PGD*sizeof(pgd_t),
PTRS_PER_PGD*sizeof(pgd_t),
0,
pgd_ctor,
PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
if (!pgd_cache)
panic("pgtable_cache_init(): Cannot create pgd cache");
} }
#endif
/* /*
* This function cannot be __init, since exceptions don't work in that * This function cannot be __init, since exceptions don't work in that
......
...@@ -67,19 +67,22 @@ static void flush_kernel_map(void *dummy) ...@@ -67,19 +67,22 @@ static void flush_kernel_map(void *dummy)
static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
{ {
struct page *page;
unsigned long flags;
set_pte_atomic(kpte, pte); /* change init_mm */ set_pte_atomic(kpte, pte); /* change init_mm */
#ifndef CONFIG_X86_PAE if (PTRS_PER_PMD > 1)
{ return;
struct list_head *l;
spin_lock(&mmlist_lock); spin_lock_irqsave(&pgd_lock, flags);
list_for_each(l, &init_mm.mmlist) { for (page = pgd_list; page; page = (struct page *)page->index) {
struct mm_struct *mm = list_entry(l, struct mm_struct, mmlist); pgd_t *pgd;
pmd_t *pmd = pmd_offset(pgd_offset(mm, address), address); pmd_t *pmd;
set_pte_atomic((pte_t *)pmd, pte); pgd = (pgd_t *)page_address(page) + pgd_index(address);
} pmd = pmd_offset(pgd, address);
spin_unlock(&mmlist_lock); set_pte_atomic((pte_t *)pmd, pte);
} }
#endif spin_unlock_irqrestore(&pgd_lock, flags);
} }
/* /*
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/spinlock.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
...@@ -151,61 +152,108 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) ...@@ -151,61 +152,108 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
return pte; return pte;
} }
#ifdef CONFIG_X86_PAE void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
{
memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
}
pgd_t *pgd_alloc(struct mm_struct *mm) /*
* List of all pgd's needed for non-PAE so it can invalidate entries
* in both cached and uncached pgd's; not needed for PAE since the
* kernel pmd is shared. If PAE were not to share the pmd a similar
* tactic would be needed. This is essentially codepath-based locking
* against pageattr.c; it is the unique case in which a valid change
* of kernel pagetables can't be lazily synchronized by vmalloc faults.
* vmalloc faults work because attached pagetables are never freed.
* If the locking proves to be non-performant, a ticketing scheme with
* checks at dup_mmap(), exec(), and other mmlist addition points
* could be used. The locking scheme was chosen on the basis of
* manfred's recommendations and having no core impact whatsoever.
* -- wli
*/
spinlock_t pgd_lock = SPIN_LOCK_UNLOCKED;
struct page *pgd_list;
static inline void pgd_list_add(pgd_t *pgd)
{ {
int i; struct page *page = virt_to_page(pgd);
pgd_t *pgd = kmem_cache_alloc(pae_pgd_cachep, GFP_KERNEL); page->index = (unsigned long)pgd_list;
if (pgd_list)
if (pgd) { pgd_list->private = (unsigned long)&page->index;
for (i = 0; i < USER_PTRS_PER_PGD; i++) { pgd_list = page;
unsigned long pmd = __get_free_page(GFP_KERNEL); page->private = (unsigned long)&pgd_list;
if (!pmd) }
goto out_oom;
clear_page(pmd); static inline void pgd_list_del(pgd_t *pgd)
set_pgd(pgd + i, __pgd(1 + __pa(pmd))); {
} struct page *next, **pprev, *page = virt_to_page(pgd);
memcpy(pgd + USER_PTRS_PER_PGD, next = (struct page *)page->index;
pprev = (struct page **)page->private;
*pprev = next;
if (next)
next->private = (unsigned long)pprev;
}
void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
{
unsigned long flags;
if (PTRS_PER_PMD == 1)
spin_lock_irqsave(&pgd_lock, flags);
memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
}
return pgd; if (PTRS_PER_PMD > 1)
out_oom: return;
for (i--; i >= 0; i--)
free_page((unsigned long)__va(pgd_val(pgd[i])-1)); pgd_list_add(pgd);
kmem_cache_free(pae_pgd_cachep, pgd); spin_unlock_irqrestore(&pgd_lock, flags);
return NULL; memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
} }
void pgd_free(pgd_t *pgd) /* never called when PTRS_PER_PMD > 1 */
void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
{ {
int i; unsigned long flags; /* can be called from interrupt context */
for (i = 0; i < USER_PTRS_PER_PGD; i++) spin_lock_irqsave(&pgd_lock, flags);
free_page((unsigned long)__va(pgd_val(pgd[i])-1)); pgd_list_del(pgd);
kmem_cache_free(pae_pgd_cachep, pgd); spin_unlock_irqrestore(&pgd_lock, flags);
} }
#else
pgd_t *pgd_alloc(struct mm_struct *mm) pgd_t *pgd_alloc(struct mm_struct *mm)
{ {
pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); int i;
pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
if (pgd) { if (PTRS_PER_PMD == 1 || !pgd)
memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); return pgd;
memcpy(pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD, for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
if (!pmd)
goto out_oom;
set_pgd(&pgd[i], __pgd(1 + __pa((u64)((u32)pmd))));
} }
return pgd; return pgd;
out_oom:
for (i--; i >= 0; i--)
kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
kmem_cache_free(pgd_cache, pgd);
return NULL;
} }
void pgd_free(pgd_t *pgd) void pgd_free(pgd_t *pgd)
{ {
free_page((unsigned long)pgd); int i;
}
#endif /* CONFIG_X86_PAE */
/* in the PAE case user pgd entries are overwritten before usage */
if (PTRS_PER_PMD > 1)
for (i = 0; i < USER_PTRS_PER_PGD; ++i)
kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
/* in the non-PAE case, clear_page_tables() clears user pgd entries */
kmem_cache_free(pgd_cache, pgd);
}
...@@ -123,6 +123,4 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) ...@@ -123,6 +123,4 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) }) #define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) })
#define PTE_FILE_MAX_BITS 32 #define PTE_FILE_MAX_BITS 32
extern struct kmem_cache_s *pae_pgd_cachep;
#endif /* _I386_PGTABLE_3LEVEL_H */ #endif /* _I386_PGTABLE_3LEVEL_H */
...@@ -21,15 +21,27 @@ ...@@ -21,15 +21,27 @@
#include <asm/bitops.h> #include <asm/bitops.h>
#endif #endif
extern pgd_t swapper_pg_dir[1024]; #include <linux/slab.h>
extern void paging_init(void); #include <linux/list.h>
#include <linux/spinlock.h>
/* /*
* ZERO_PAGE is a global shared page that is always zero: used * ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc.. * for zero-mapped memory areas etc..
*/ */
extern unsigned long empty_zero_page[1024];
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
extern unsigned long empty_zero_page[1024];
extern pgd_t swapper_pg_dir[1024];
extern kmem_cache_t *pgd_cache;
extern kmem_cache_t *pmd_cache;
extern spinlock_t pgd_lock;
extern struct page *pgd_list;
void pmd_ctor(void *, kmem_cache_t *, unsigned long);
void pgd_ctor(void *, kmem_cache_t *, unsigned long);
void pgd_dtor(void *, kmem_cache_t *, unsigned long);
void pgtable_cache_init(void);
void paging_init(void);
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
...@@ -41,20 +53,8 @@ extern unsigned long empty_zero_page[1024]; ...@@ -41,20 +53,8 @@ extern unsigned long empty_zero_page[1024];
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#ifdef CONFIG_X86_PAE #ifdef CONFIG_X86_PAE
# include <asm/pgtable-3level.h> # include <asm/pgtable-3level.h>
/*
* Need to initialise the X86 PAE caches
*/
extern void pgtable_cache_init(void);
#else #else
# include <asm/pgtable-2level.h> # include <asm/pgtable-2level.h>
/*
* No page table caches to initialise
*/
#define pgtable_cache_init() do { } while (0)
#endif #endif
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment