Commit 7c9d187e authored by Linus Torvalds's avatar Linus Torvalds

First cut at proper TLB shootdown for page directory entries.

parent 40f53750
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#include <linux/config.h> #include <linux/config.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#ifdef CONFIG_SMP
/* aim for something that fits in the L1 cache */ /* aim for something that fits in the L1 cache */
#define FREE_PTE_NR 508 #define FREE_PTE_NR 508
...@@ -26,90 +25,100 @@ ...@@ -26,90 +25,100 @@
* shootdown. * shootdown.
*/ */
typedef struct free_pte_ctx { typedef struct free_pte_ctx {
struct vm_area_struct *vma; struct mm_struct *mm;
unsigned long nr; /* set to ~0UL means fast mode */ unsigned long nr; /* set to ~0UL means fast mode */
unsigned long start_addr, end_addr; unsigned long freed;
unsigned long start_addr, end_addr;
pte_t ptes[FREE_PTE_NR]; pte_t ptes[FREE_PTE_NR];
} mmu_gather_t; } mmu_gather_t;
/* Users of the generic TLB shootdown code must declare this storage space. */ /* Users of the generic TLB shootdown code must declare this storage space. */
extern mmu_gather_t mmu_gathers[NR_CPUS]; extern mmu_gather_t mmu_gathers[NR_CPUS];
/* Do me later */
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
/* tlb_gather_mmu /* tlb_gather_mmu
* Return a pointer to an initialized mmu_gather_t. * Return a pointer to an initialized mmu_gather_t.
*/ */
static inline mmu_gather_t *tlb_gather_mmu(struct vm_area_struct *vma) static inline mmu_gather_t *tlb_gather_mmu(struct mm_struct *mm)
{ {
mmu_gather_t *tlb = &mmu_gathers[smp_processor_id()]; mmu_gather_t *tlb = &mmu_gathers[smp_processor_id()];
struct mm_struct *mm = vma->vm_mm;
tlb->vma = vma; tlb->mm = mm;
tlb->freed = 0;
/* Use fast mode if there is only one user of this mm (this process) */ /* Use fast mode if there is only one user of this mm (this process) */
tlb->nr = (atomic_read(&(mm)->mm_users) == 1) ? ~0UL : 0UL; tlb->nr = (atomic_read(&(mm)->mm_users) == 1) ? ~0UL : 0UL;
return tlb; return tlb;
} }
/* void tlb_remove_page(mmu_gather_t *tlb, pte_t *ptep, unsigned long addr) static inline void tlb_flush_mmu(mmu_gather_t *tlb, unsigned long start, unsigned long end)
* Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
* handling the additional races in SMP caused by other CPUs caching valid
* mappings in their TLBs.
*/
#define tlb_remove_page(ctxp, pte, addr) do {\
/* Handle the common case fast, first. */\
if ((ctxp)->nr == ~0UL) {\
__free_pte(*(pte));\
pte_clear((pte));\
break;\
}\
if (!(ctxp)->nr) \
(ctxp)->start_addr = (addr);\
(ctxp)->ptes[(ctxp)->nr++] = ptep_get_and_clear(pte);\
(ctxp)->end_addr = (addr) + PAGE_SIZE;\
if ((ctxp)->nr >= FREE_PTE_NR)\
tlb_finish_mmu((ctxp), 0, 0);\
} while (0)
/* tlb_finish_mmu
* Called at the end of the shootdown operation to free up any resources
* that were required. The page table lock is still held at this point.
*/
static inline void tlb_finish_mmu(struct free_pte_ctx *ctx, unsigned long start, unsigned long end)
{ {
unsigned long i, nr; unsigned long i, nr;
/* Handle the fast case first. */ /* Handle the fast case first. */
if (ctx->nr == ~0UL) { if (tlb->nr == ~0UL) {
flush_tlb_range(ctx->vma, start, end); flush_tlb_mm(tlb->mm);
return; return;
} }
nr = ctx->nr; nr = tlb->nr;
ctx->nr = 0; tlb->nr = 0;
if (nr) if (nr)
flush_tlb_range(ctx->vma, ctx->start_addr, ctx->end_addr); flush_tlb_mm(tlb->mm);
for (i=0; i < nr; i++) { for (i=0; i < nr; i++) {
pte_t pte = ctx->ptes[i]; pte_t pte = tlb->ptes[i];
__free_pte(pte); __free_pte(pte);
} }
} }
#else /* tlb_finish_mmu
* Called at the end of the shootdown operation to free up any resources
/* The uniprocessor functions are quite simple and are inline macros in an * that were required. The page table lock is still held at this point.
* attempt to get gcc to generate optimal code since this code is run on each
* page in a process at exit.
*/ */
typedef struct vm_area_struct mmu_gather_t; static inline void tlb_finish_mmu(mmu_gather_t *tlb, unsigned long start, unsigned long end)
{
int freed = tlb->freed;
struct mm_struct *mm = tlb->mm;
int rss = mm->rss;
if (rss < freed)
freed = rss;
mm->rss = rss - freed;
#define tlb_gather_mmu(vma) (vma) tlb_flush_mmu(tlb, start, end);
#define tlb_finish_mmu(tlb, start, end) flush_tlb_range(tlb, start, end) }
#define tlb_remove_page(tlb, ptep, addr) do {\
pte_t __pte = *(ptep);\
pte_clear(ptep);\
__free_pte(__pte);\
} while (0)
#endif
/* void tlb_remove_page(mmu_gather_t *tlb, pte_t *ptep, unsigned long addr)
* Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
* handling the additional races in SMP caused by other CPUs caching valid
* mappings in their TLBs.
*/
static inline void tlb_remove_page(mmu_gather_t *tlb, pte_t *pte, unsigned long addr)
{
struct page *page;
unsigned long pfn = pte_pfn(*pte);
if (pfn_valid(pfn)) {
page = pfn_to_page(pfn);
if (!PageReserved(page))
tlb->freed++;
}
/* Handle the common case fast, first. */\
if (tlb->nr == ~0UL) {
__free_pte(*pte);
pte_clear(pte);
return;
}
if (!tlb->nr)
tlb->start_addr = addr;
tlb->ptes[tlb->nr++] = ptep_get_and_clear(pte);
tlb->end_addr = addr + PAGE_SIZE;
if (tlb->nr >= FREE_PTE_NR)
tlb_finish_mmu(tlb, 0, 0);
}
#endif /* _ASM_GENERIC__TLB_H */ #endif /* _ASM_GENERIC__TLB_H */
...@@ -133,18 +133,18 @@ static inline void free_one_pgd(pgd_t * dir) ...@@ -133,18 +133,18 @@ static inline void free_one_pgd(pgd_t * dir)
/* /*
* This function clears all user-level page tables of a process - this * This function clears all user-level page tables of a process - this
* is needed by execve(), so that old pages aren't in the way. * is needed by execve(), so that old pages aren't in the way.
*
* Must be called with pagetable lock held.
*/ */
void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr) void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr)
{ {
pgd_t * page_dir = mm->pgd; pgd_t * page_dir = mm->pgd;
spin_lock(&mm->page_table_lock);
page_dir += first; page_dir += first;
do { do {
free_one_pgd(page_dir); free_one_pgd(page_dir);
page_dir++; page_dir++;
} while (--nr); } while (--nr);
spin_unlock(&mm->page_table_lock);
/* keep the page table cache within bounds */ /* keep the page table cache within bounds */
check_pgt_cache(); check_pgt_cache();
...@@ -340,18 +340,17 @@ static inline void forget_pte(pte_t page) ...@@ -340,18 +340,17 @@ static inline void forget_pte(pte_t page)
} }
} }
static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size) static void zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
{ {
unsigned long offset; unsigned long offset;
pte_t *ptep; pte_t *ptep;
int freed = 0;
if (pmd_none(*pmd)) if (pmd_none(*pmd))
return 0; return;
if (pmd_bad(*pmd)) { if (pmd_bad(*pmd)) {
pmd_ERROR(*pmd); pmd_ERROR(*pmd);
pmd_clear(pmd); pmd_clear(pmd);
return 0; return;
} }
ptep = pte_offset_map(pmd, address); ptep = pte_offset_map(pmd, address);
offset = address & ~PMD_MASK; offset = address & ~PMD_MASK;
...@@ -363,13 +362,6 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad ...@@ -363,13 +362,6 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad
if (pte_none(pte)) if (pte_none(pte))
continue; continue;
if (pte_present(pte)) { if (pte_present(pte)) {
struct page *page;
unsigned long pfn = pte_pfn(pte);
if (pfn_valid(pfn)) {
page = pfn_to_page(pfn);
if (!PageReserved(page))
freed++;
}
/* This will eventually call __free_pte on the pte. */ /* This will eventually call __free_pte on the pte. */
tlb_remove_page(tlb, ptep, address + offset); tlb_remove_page(tlb, ptep, address + offset);
} else { } else {
...@@ -378,34 +370,45 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad ...@@ -378,34 +370,45 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad
} }
} }
pte_unmap(ptep-1); pte_unmap(ptep-1);
return freed;
} }
static inline int zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size) static void zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size)
{ {
pmd_t * pmd; pmd_t * pmd;
unsigned long end; unsigned long end;
int freed;
if (pgd_none(*dir)) if (pgd_none(*dir))
return 0; return;
if (pgd_bad(*dir)) { if (pgd_bad(*dir)) {
pgd_ERROR(*dir); pgd_ERROR(*dir);
pgd_clear(dir); pgd_clear(dir);
return 0; return;
} }
pmd = pmd_offset(dir, address); pmd = pmd_offset(dir, address);
end = address + size; end = address + size;
if (end > ((address + PGDIR_SIZE) & PGDIR_MASK)) if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
end = ((address + PGDIR_SIZE) & PGDIR_MASK); end = ((address + PGDIR_SIZE) & PGDIR_MASK);
freed = 0;
do { do {
freed += zap_pte_range(tlb, pmd, address, end - address); zap_pte_range(tlb, pmd, address, end - address);
address = (address + PMD_SIZE) & PMD_MASK; address = (address + PMD_SIZE) & PMD_MASK;
pmd++; pmd++;
} while (address < end); } while (address < end);
return freed; }
void unmap_page_range(mmu_gather_t *tlb, struct vm_area_struct *vma, unsigned long address, unsigned long end)
{
pgd_t * dir;
if (address >= end)
BUG();
dir = pgd_offset(vma->vm_mm, address);
tlb_start_vma(tlb, vma);
do {
zap_pmd_range(tlb, dir, address, end - address);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (address && (address < end));
tlb_end_vma(tlb, vma);
} }
/* /*
...@@ -417,7 +420,6 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned ...@@ -417,7 +420,6 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned
mmu_gather_t *tlb; mmu_gather_t *tlb;
pgd_t * dir; pgd_t * dir;
unsigned long start = address, end = address + size; unsigned long start = address, end = address + size;
int freed = 0;
dir = pgd_offset(mm, address); dir = pgd_offset(mm, address);
...@@ -432,25 +434,10 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned ...@@ -432,25 +434,10 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned
BUG(); BUG();
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
flush_cache_range(vma, address, end); flush_cache_range(vma, address, end);
tlb = tlb_gather_mmu(vma);
do { tlb = tlb_gather_mmu(mm);
freed += zap_pmd_range(tlb, dir, address, end - address); unmap_page_range(tlb, vma, address, end);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (address && (address < end));
/* this will flush any remaining tlb entries */
tlb_finish_mmu(tlb, start, end); tlb_finish_mmu(tlb, start, end);
/*
* Update rss for the mm_struct (not necessarily current->mm)
* Notice that rss is an unsigned long.
*/
if (mm->rss > freed)
mm->rss -= freed;
else
mm->rss = 0;
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
} }
......
...@@ -17,7 +17,9 @@ ...@@ -17,7 +17,9 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/tlbflush.h> #include <asm/tlb.h>
extern void unmap_page_range(mmu_gather_t *,struct vm_area_struct *vma, unsigned long address, unsigned long size);
/* /*
* WARNING: the debugging will use recursive algorithms so never enable this * WARNING: the debugging will use recursive algorithms so never enable this
...@@ -329,11 +331,11 @@ static void __vma_link(struct mm_struct * mm, struct vm_area_struct * vma, stru ...@@ -329,11 +331,11 @@ static void __vma_link(struct mm_struct * mm, struct vm_area_struct * vma, stru
static inline void vma_link(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev, static inline void vma_link(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
rb_node_t ** rb_link, rb_node_t * rb_parent) rb_node_t ** rb_link, rb_node_t * rb_parent)
{ {
lock_vma_mappings(vma);
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
lock_vma_mappings(vma);
__vma_link(mm, vma, prev, rb_link, rb_parent); __vma_link(mm, vma, prev, rb_link, rb_parent);
spin_unlock(&mm->page_table_lock);
unlock_vma_mappings(vma); unlock_vma_mappings(vma);
spin_unlock(&mm->page_table_lock);
mm->map_count++; mm->map_count++;
validate_mm(mm); validate_mm(mm);
...@@ -781,13 +783,11 @@ static struct vm_area_struct * unmap_fixup(struct mm_struct *mm, ...@@ -781,13 +783,11 @@ static struct vm_area_struct * unmap_fixup(struct mm_struct *mm,
*/ */
area->vm_end = addr; area->vm_end = addr;
lock_vma_mappings(area); lock_vma_mappings(area);
spin_lock(&mm->page_table_lock);
} else if (addr == area->vm_start) { } else if (addr == area->vm_start) {
area->vm_pgoff += (end - area->vm_start) >> PAGE_SHIFT; area->vm_pgoff += (end - area->vm_start) >> PAGE_SHIFT;
/* same locking considerations of the above case */ /* same locking considerations of the above case */
area->vm_start = end; area->vm_start = end;
lock_vma_mappings(area); lock_vma_mappings(area);
spin_lock(&mm->page_table_lock);
} else { } else {
/* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */ /* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
/* Add end mapping -- leave beginning for below */ /* Add end mapping -- leave beginning for below */
...@@ -814,12 +814,10 @@ static struct vm_area_struct * unmap_fixup(struct mm_struct *mm, ...@@ -814,12 +814,10 @@ static struct vm_area_struct * unmap_fixup(struct mm_struct *mm,
* things correctly. * things correctly.
*/ */
lock_vma_mappings(area); lock_vma_mappings(area);
spin_lock(&mm->page_table_lock);
__insert_vm_struct(mm, mpnt); __insert_vm_struct(mm, mpnt);
} }
__insert_vm_struct(mm, area); __insert_vm_struct(mm, area);
spin_unlock(&mm->page_table_lock);
unlock_vma_mappings(area); unlock_vma_mappings(area);
return extra; return extra;
} }
...@@ -889,6 +887,7 @@ static void free_pgtables(struct mm_struct * mm, struct vm_area_struct *prev, ...@@ -889,6 +887,7 @@ static void free_pgtables(struct mm_struct * mm, struct vm_area_struct *prev,
*/ */
int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
{ {
mmu_gather_t *tlb;
struct vm_area_struct *mpnt, *prev, **npp, *free, *extra; struct vm_area_struct *mpnt, *prev, **npp, *free, *extra;
if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr) if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
...@@ -933,7 +932,8 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) ...@@ -933,7 +932,8 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
rb_erase(&mpnt->vm_rb, &mm->mm_rb); rb_erase(&mpnt->vm_rb, &mm->mm_rb);
} }
mm->mmap_cache = NULL; /* Kill the cache. */ mm->mmap_cache = NULL; /* Kill the cache. */
spin_unlock(&mm->page_table_lock);
tlb = tlb_gather_mmu(mm);
/* Ok - we have the memory areas we should free on the 'free' list, /* Ok - we have the memory areas we should free on the 'free' list,
* so release them, and unmap the page range.. * so release them, and unmap the page range..
...@@ -942,7 +942,7 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) ...@@ -942,7 +942,7 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
* In that case we have to be careful with VM_DENYWRITE. * In that case we have to be careful with VM_DENYWRITE.
*/ */
while ((mpnt = free) != NULL) { while ((mpnt = free) != NULL) {
unsigned long st, end, size; unsigned long st, end;
struct file *file = NULL; struct file *file = NULL;
free = free->vm_next; free = free->vm_next;
...@@ -950,7 +950,6 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) ...@@ -950,7 +950,6 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
st = addr < mpnt->vm_start ? mpnt->vm_start : addr; st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
end = addr+len; end = addr+len;
end = end > mpnt->vm_end ? mpnt->vm_end : end; end = end > mpnt->vm_end ? mpnt->vm_end : end;
size = end - st;
if (mpnt->vm_flags & VM_DENYWRITE && if (mpnt->vm_flags & VM_DENYWRITE &&
(st != mpnt->vm_start || end != mpnt->vm_end) && (st != mpnt->vm_start || end != mpnt->vm_end) &&
...@@ -960,12 +959,12 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) ...@@ -960,12 +959,12 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
remove_shared_vm_struct(mpnt); remove_shared_vm_struct(mpnt);
mm->map_count--; mm->map_count--;
zap_page_range(mpnt, st, size); unmap_page_range(tlb, mpnt, st, end);
/* /*
* Fix the mapping, and free the old area if it wasn't reused. * Fix the mapping, and free the old area if it wasn't reused.
*/ */
extra = unmap_fixup(mm, mpnt, st, size, extra); extra = unmap_fixup(mm, mpnt, st, end-st, extra);
if (file) if (file)
atomic_inc(&file->f_dentry->d_inode->i_writecount); atomic_inc(&file->f_dentry->d_inode->i_writecount);
} }
...@@ -976,6 +975,8 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) ...@@ -976,6 +975,8 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
kmem_cache_free(vm_area_cachep, extra); kmem_cache_free(vm_area_cachep, extra);
free_pgtables(mm, prev, addr, addr+len); free_pgtables(mm, prev, addr, addr+len);
tlb_finish_mmu(tlb, addr, addr+len);
spin_unlock(&mm->page_table_lock);
return 0; return 0;
} }
...@@ -1092,6 +1093,7 @@ void build_mmap_rb(struct mm_struct * mm) ...@@ -1092,6 +1093,7 @@ void build_mmap_rb(struct mm_struct * mm)
/* Release all mmaps. */ /* Release all mmaps. */
void exit_mmap(struct mm_struct * mm) void exit_mmap(struct mm_struct * mm)
{ {
mmu_gather_t *tlb;
struct vm_area_struct * mpnt; struct vm_area_struct * mpnt;
release_segments(mm); release_segments(mm);
...@@ -1100,16 +1102,16 @@ void exit_mmap(struct mm_struct * mm) ...@@ -1100,16 +1102,16 @@ void exit_mmap(struct mm_struct * mm)
mm->mmap = mm->mmap_cache = NULL; mm->mmap = mm->mmap_cache = NULL;
mm->mm_rb = RB_ROOT; mm->mm_rb = RB_ROOT;
mm->rss = 0; mm->rss = 0;
spin_unlock(&mm->page_table_lock);
mm->total_vm = 0; mm->total_vm = 0;
mm->locked_vm = 0; mm->locked_vm = 0;
tlb = tlb_gather_mmu(mm);
flush_cache_mm(mm); flush_cache_mm(mm);
while (mpnt) { while (mpnt) {
struct vm_area_struct * next = mpnt->vm_next; struct vm_area_struct * next = mpnt->vm_next;
unsigned long start = mpnt->vm_start; unsigned long start = mpnt->vm_start;
unsigned long end = mpnt->vm_end; unsigned long end = mpnt->vm_end;
unsigned long size = end - start;
if (mpnt->vm_ops) { if (mpnt->vm_ops) {
if (mpnt->vm_ops->close) if (mpnt->vm_ops->close)
...@@ -1117,19 +1119,20 @@ void exit_mmap(struct mm_struct * mm) ...@@ -1117,19 +1119,20 @@ void exit_mmap(struct mm_struct * mm)
} }
mm->map_count--; mm->map_count--;
remove_shared_vm_struct(mpnt); remove_shared_vm_struct(mpnt);
zap_page_range(mpnt, start, size); unmap_page_range(tlb, mpnt, start, end);
if (mpnt->vm_file) if (mpnt->vm_file)
fput(mpnt->vm_file); fput(mpnt->vm_file);
kmem_cache_free(vm_area_cachep, mpnt); kmem_cache_free(vm_area_cachep, mpnt);
mpnt = next; mpnt = next;
} }
flush_tlb_mm(mm);
/* This is just debugging */ /* This is just debugging */
if (mm->map_count) if (mm->map_count)
BUG(); BUG();
clear_page_tables(mm, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD); clear_page_tables(mm, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
tlb_finish_mmu(tlb, FIRST_USER_PGD_NR*PGDIR_SIZE, USER_PTRS_PER_PGD*PGDIR_SIZE);
spin_unlock(&mm->page_table_lock);
} }
/* Insert vm structure into process list sorted by address /* Insert vm structure into process list sorted by address
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment