Commit 7c9d187e authored by Linus Torvalds's avatar Linus Torvalds

First cut at proper TLB shootdown for page directory entries.

parent 40f53750
......@@ -16,7 +16,6 @@
#include <linux/config.h>
#include <asm/tlbflush.h>
#ifdef CONFIG_SMP
/* aim for something that fits in the L1 cache */
#define FREE_PTE_NR 508
......@@ -26,90 +25,100 @@
* shootdown.
*/
typedef struct free_pte_ctx {
struct vm_area_struct *vma;
struct mm_struct *mm;
unsigned long nr; /* set to ~0UL means fast mode */
unsigned long start_addr, end_addr;
unsigned long freed;
unsigned long start_addr, end_addr;
pte_t ptes[FREE_PTE_NR];
} mmu_gather_t;
/* Users of the generic TLB shootdown code must declare this storage space. */
extern mmu_gather_t mmu_gathers[NR_CPUS];
/* Do me later */
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
/* tlb_gather_mmu
* Return a pointer to an initialized mmu_gather_t.
*/
static inline mmu_gather_t *tlb_gather_mmu(struct vm_area_struct *vma)
static inline mmu_gather_t *tlb_gather_mmu(struct mm_struct *mm)
{
mmu_gather_t *tlb = &mmu_gathers[smp_processor_id()];
struct mm_struct *mm = vma->vm_mm;
tlb->vma = vma;
tlb->mm = mm;
tlb->freed = 0;
/* Use fast mode if there is only one user of this mm (this process) */
tlb->nr = (atomic_read(&(mm)->mm_users) == 1) ? ~0UL : 0UL;
return tlb;
}
/* void tlb_remove_page(mmu_gather_t *tlb, pte_t *ptep, unsigned long addr)
* Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
* handling the additional races in SMP caused by other CPUs caching valid
* mappings in their TLBs.
*/
#define tlb_remove_page(ctxp, pte, addr) do {\
/* Handle the common case fast, first. */\
if ((ctxp)->nr == ~0UL) {\
__free_pte(*(pte));\
pte_clear((pte));\
break;\
}\
if (!(ctxp)->nr) \
(ctxp)->start_addr = (addr);\
(ctxp)->ptes[(ctxp)->nr++] = ptep_get_and_clear(pte);\
(ctxp)->end_addr = (addr) + PAGE_SIZE;\
if ((ctxp)->nr >= FREE_PTE_NR)\
tlb_finish_mmu((ctxp), 0, 0);\
} while (0)
/* tlb_finish_mmu
* Called at the end of the shootdown operation to free up any resources
* that were required. The page table lock is still held at this point.
*/
static inline void tlb_finish_mmu(struct free_pte_ctx *ctx, unsigned long start, unsigned long end)
static inline void tlb_flush_mmu(mmu_gather_t *tlb, unsigned long start, unsigned long end)
{
unsigned long i, nr;
/* Handle the fast case first. */
if (ctx->nr == ~0UL) {
flush_tlb_range(ctx->vma, start, end);
if (tlb->nr == ~0UL) {
flush_tlb_mm(tlb->mm);
return;
}
nr = ctx->nr;
ctx->nr = 0;
nr = tlb->nr;
tlb->nr = 0;
if (nr)
flush_tlb_range(ctx->vma, ctx->start_addr, ctx->end_addr);
flush_tlb_mm(tlb->mm);
for (i=0; i < nr; i++) {
pte_t pte = ctx->ptes[i];
pte_t pte = tlb->ptes[i];
__free_pte(pte);
}
}
#else
/* The uniprocessor functions are quite simple and are inline macros in an
* attempt to get gcc to generate optimal code since this code is run on each
* page in a process at exit.
/* tlb_finish_mmu
* Called at the end of the shootdown operation to free up any resources
* that were required. The page table lock is still held at this point.
*/
typedef struct vm_area_struct mmu_gather_t;
static inline void tlb_finish_mmu(mmu_gather_t *tlb, unsigned long start, unsigned long end)
{
int freed = tlb->freed;
struct mm_struct *mm = tlb->mm;
int rss = mm->rss;
if (rss < freed)
freed = rss;
mm->rss = rss - freed;
#define tlb_gather_mmu(vma) (vma)
#define tlb_finish_mmu(tlb, start, end) flush_tlb_range(tlb, start, end)
#define tlb_remove_page(tlb, ptep, addr) do {\
pte_t __pte = *(ptep);\
pte_clear(ptep);\
__free_pte(__pte);\
} while (0)
tlb_flush_mmu(tlb, start, end);
}
#endif
/* void tlb_remove_page(mmu_gather_t *tlb, pte_t *ptep, unsigned long addr)
* Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
* handling the additional races in SMP caused by other CPUs caching valid
* mappings in their TLBs.
*/
static inline void tlb_remove_page(mmu_gather_t *tlb, pte_t *pte, unsigned long addr)
{
struct page *page;
unsigned long pfn = pte_pfn(*pte);
if (pfn_valid(pfn)) {
page = pfn_to_page(pfn);
if (!PageReserved(page))
tlb->freed++;
}
/* Handle the common case fast, first. */\
if (tlb->nr == ~0UL) {
__free_pte(*pte);
pte_clear(pte);
return;
}
if (!tlb->nr)
tlb->start_addr = addr;
tlb->ptes[tlb->nr++] = ptep_get_and_clear(pte);
tlb->end_addr = addr + PAGE_SIZE;
if (tlb->nr >= FREE_PTE_NR)
tlb_finish_mmu(tlb, 0, 0);
}
#endif /* _ASM_GENERIC__TLB_H */
......@@ -133,18 +133,18 @@ static inline void free_one_pgd(pgd_t * dir)
/*
* This function clears all user-level page tables of a process - this
* is needed by execve(), so that old pages aren't in the way.
*
* Must be called with pagetable lock held.
*/
void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr)
{
pgd_t * page_dir = mm->pgd;
spin_lock(&mm->page_table_lock);
page_dir += first;
do {
free_one_pgd(page_dir);
page_dir++;
} while (--nr);
spin_unlock(&mm->page_table_lock);
/* keep the page table cache within bounds */
check_pgt_cache();
......@@ -340,18 +340,17 @@ static inline void forget_pte(pte_t page)
}
}
static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
static void zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
{
unsigned long offset;
pte_t *ptep;
int freed = 0;
if (pmd_none(*pmd))
return 0;
return;
if (pmd_bad(*pmd)) {
pmd_ERROR(*pmd);
pmd_clear(pmd);
return 0;
return;
}
ptep = pte_offset_map(pmd, address);
offset = address & ~PMD_MASK;
......@@ -363,13 +362,6 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad
if (pte_none(pte))
continue;
if (pte_present(pte)) {
struct page *page;
unsigned long pfn = pte_pfn(pte);
if (pfn_valid(pfn)) {
page = pfn_to_page(pfn);
if (!PageReserved(page))
freed++;
}
/* This will eventually call __free_pte on the pte. */
tlb_remove_page(tlb, ptep, address + offset);
} else {
......@@ -378,34 +370,45 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad
}
}
pte_unmap(ptep-1);
return freed;
}
static inline int zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size)
static void zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size)
{
pmd_t * pmd;
unsigned long end;
int freed;
if (pgd_none(*dir))
return 0;
return;
if (pgd_bad(*dir)) {
pgd_ERROR(*dir);
pgd_clear(dir);
return 0;
return;
}
pmd = pmd_offset(dir, address);
end = address + size;
if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
end = ((address + PGDIR_SIZE) & PGDIR_MASK);
freed = 0;
do {
freed += zap_pte_range(tlb, pmd, address, end - address);
zap_pte_range(tlb, pmd, address, end - address);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address < end);
return freed;
}
void unmap_page_range(mmu_gather_t *tlb, struct vm_area_struct *vma, unsigned long address, unsigned long end)
{
pgd_t * dir;
if (address >= end)
BUG();
dir = pgd_offset(vma->vm_mm, address);
tlb_start_vma(tlb, vma);
do {
zap_pmd_range(tlb, dir, address, end - address);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (address && (address < end));
tlb_end_vma(tlb, vma);
}
/*
......@@ -417,7 +420,6 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned
mmu_gather_t *tlb;
pgd_t * dir;
unsigned long start = address, end = address + size;
int freed = 0;
dir = pgd_offset(mm, address);
......@@ -432,25 +434,10 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned
BUG();
spin_lock(&mm->page_table_lock);
flush_cache_range(vma, address, end);
tlb = tlb_gather_mmu(vma);
do {
freed += zap_pmd_range(tlb, dir, address, end - address);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (address && (address < end));
/* this will flush any remaining tlb entries */
tlb = tlb_gather_mmu(mm);
unmap_page_range(tlb, vma, address, end);
tlb_finish_mmu(tlb, start, end);
/*
* Update rss for the mm_struct (not necessarily current->mm)
* Notice that rss is an unsigned long.
*/
if (mm->rss > freed)
mm->rss -= freed;
else
mm->rss = 0;
spin_unlock(&mm->page_table_lock);
}
......
......@@ -17,7 +17,9 @@
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
extern void unmap_page_range(mmu_gather_t *,struct vm_area_struct *vma, unsigned long address, unsigned long size);
/*
* WARNING: the debugging will use recursive algorithms so never enable this
......@@ -329,11 +331,11 @@ static void __vma_link(struct mm_struct * mm, struct vm_area_struct * vma, stru
static inline void vma_link(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
rb_node_t ** rb_link, rb_node_t * rb_parent)
{
lock_vma_mappings(vma);
spin_lock(&mm->page_table_lock);
lock_vma_mappings(vma);
__vma_link(mm, vma, prev, rb_link, rb_parent);
spin_unlock(&mm->page_table_lock);
unlock_vma_mappings(vma);
spin_unlock(&mm->page_table_lock);
mm->map_count++;
validate_mm(mm);
......@@ -781,13 +783,11 @@ static struct vm_area_struct * unmap_fixup(struct mm_struct *mm,
*/
area->vm_end = addr;
lock_vma_mappings(area);
spin_lock(&mm->page_table_lock);
} else if (addr == area->vm_start) {
area->vm_pgoff += (end - area->vm_start) >> PAGE_SHIFT;
/* same locking considerations of the above case */
area->vm_start = end;
lock_vma_mappings(area);
spin_lock(&mm->page_table_lock);
} else {
/* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
/* Add end mapping -- leave beginning for below */
......@@ -814,12 +814,10 @@ static struct vm_area_struct * unmap_fixup(struct mm_struct *mm,
* things correctly.
*/
lock_vma_mappings(area);
spin_lock(&mm->page_table_lock);
__insert_vm_struct(mm, mpnt);
}
__insert_vm_struct(mm, area);
spin_unlock(&mm->page_table_lock);
unlock_vma_mappings(area);
return extra;
}
......@@ -889,6 +887,7 @@ static void free_pgtables(struct mm_struct * mm, struct vm_area_struct *prev,
*/
int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
{
mmu_gather_t *tlb;
struct vm_area_struct *mpnt, *prev, **npp, *free, *extra;
if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
......@@ -933,7 +932,8 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
rb_erase(&mpnt->vm_rb, &mm->mm_rb);
}
mm->mmap_cache = NULL; /* Kill the cache. */
spin_unlock(&mm->page_table_lock);
tlb = tlb_gather_mmu(mm);
/* Ok - we have the memory areas we should free on the 'free' list,
* so release them, and unmap the page range..
......@@ -942,7 +942,7 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
* In that case we have to be careful with VM_DENYWRITE.
*/
while ((mpnt = free) != NULL) {
unsigned long st, end, size;
unsigned long st, end;
struct file *file = NULL;
free = free->vm_next;
......@@ -950,7 +950,6 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
end = addr+len;
end = end > mpnt->vm_end ? mpnt->vm_end : end;
size = end - st;
if (mpnt->vm_flags & VM_DENYWRITE &&
(st != mpnt->vm_start || end != mpnt->vm_end) &&
......@@ -960,12 +959,12 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
remove_shared_vm_struct(mpnt);
mm->map_count--;
zap_page_range(mpnt, st, size);
unmap_page_range(tlb, mpnt, st, end);
/*
* Fix the mapping, and free the old area if it wasn't reused.
*/
extra = unmap_fixup(mm, mpnt, st, size, extra);
extra = unmap_fixup(mm, mpnt, st, end-st, extra);
if (file)
atomic_inc(&file->f_dentry->d_inode->i_writecount);
}
......@@ -976,6 +975,8 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
kmem_cache_free(vm_area_cachep, extra);
free_pgtables(mm, prev, addr, addr+len);
tlb_finish_mmu(tlb, addr, addr+len);
spin_unlock(&mm->page_table_lock);
return 0;
}
......@@ -1092,6 +1093,7 @@ void build_mmap_rb(struct mm_struct * mm)
/* Release all mmaps. */
void exit_mmap(struct mm_struct * mm)
{
mmu_gather_t *tlb;
struct vm_area_struct * mpnt;
release_segments(mm);
......@@ -1100,16 +1102,16 @@ void exit_mmap(struct mm_struct * mm)
mm->mmap = mm->mmap_cache = NULL;
mm->mm_rb = RB_ROOT;
mm->rss = 0;
spin_unlock(&mm->page_table_lock);
mm->total_vm = 0;
mm->locked_vm = 0;
tlb = tlb_gather_mmu(mm);
flush_cache_mm(mm);
while (mpnt) {
struct vm_area_struct * next = mpnt->vm_next;
unsigned long start = mpnt->vm_start;
unsigned long end = mpnt->vm_end;
unsigned long size = end - start;
if (mpnt->vm_ops) {
if (mpnt->vm_ops->close)
......@@ -1117,19 +1119,20 @@ void exit_mmap(struct mm_struct * mm)
}
mm->map_count--;
remove_shared_vm_struct(mpnt);
zap_page_range(mpnt, start, size);
unmap_page_range(tlb, mpnt, start, end);
if (mpnt->vm_file)
fput(mpnt->vm_file);
kmem_cache_free(vm_area_cachep, mpnt);
mpnt = next;
}
flush_tlb_mm(mm);
/* This is just debugging */
if (mm->map_count)
BUG();
clear_page_tables(mm, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
tlb_finish_mmu(tlb, FIRST_USER_PGD_NR*PGDIR_SIZE, USER_PTRS_PER_PGD*PGDIR_SIZE);
spin_unlock(&mm->page_table_lock);
}
/* Insert vm structure into process list sorted by address
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment