First cut at proper TLB shootdown for page directory entries.

7c9d187e · Linus Torvalds · 40f53750 · 7c9d187e · 7c9d187e · 7c9d187e
Commit 7c9d187e authored May 15, 2002 by Linus Torvalds
Hide whitespace changes
Inline Side-by-side

Showing with 106 additions and 107 deletions

include/asm-generic/tlb.h include/asm-generic/tlb.h +60 -51

mm/memory.c mm/memory.c +27 -40

mm/mmap.c mm/mmap.c +19 -16

No files found.
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -16,7 +16,6 @@
 #include <linux/config.h>
 #include <asm/tlbflush.h>

-#ifdef CONFIG_SMP
 /* aim for something that fits in the L1 cache */
 #define FREE_PTE_NR	508

@@ -26,90 +25,100 @@
 * shootdown.
 */
 typedef struct free_pte_ctx {
-	struct vm_area_struct	*vma;
+	struct mm_struct	*mm;
 	unsigned long		nr;	/* set to ~0UL means fast mode */
-	unsigned long	start_addr, end_addr;
+	unsigned long		freed;
+	unsigned long		start_addr, end_addr;
 	pte_t	ptes[FREE_PTE_NR];
 } mmu_gather_t;

 /* Users of the generic TLB shootdown code must declare this storage space. */
 extern mmu_gather_t	mmu_gathers[NR_CPUS];

+/* Do me later */
+#define tlb_start_vma(tlb, vma) do { } while (0)
+#define tlb_end_vma(tlb, vma) do { } while (0)
+
 /* tlb_gather_mmu
 *	Return a pointer to an initialized mmu_gather_t.
 */
-static inline mmu_gather_t *tlb_gather_mmu(struct vm_area_struct *vma)
+static inline mmu_gather_t *tlb_gather_mmu(struct mm_struct *mm)
 {
 	mmu_gather_t *tlb = &mmu_gathers[smp_processor_id()];
-	struct mm_struct *mm = vma->vm_mm;

-	tlb->vma = vma;
+	tlb->mm = mm;
+	tlb->freed = 0;
 	/* Use fast mode if there is only one user of this mm (this process) */
 	tlb->nr = (atomic_read(&(mm)->mm_users) == 1) ? ~0UL : 0UL;
 	return tlb;
 }

-/* void tlb_remove_page(mmu_gather_t *tlb, pte_t *ptep, unsigned long addr)
- *	Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
- *	handling the additional races in SMP caused by other CPUs caching valid
- *	mappings in their TLBs.
- */
-#define tlb_remove_page(ctxp, pte, addr) do {\
-		/* Handle the common case fast, first. */\
-		if ((ctxp)->nr == ~0UL) {\
-			__free_pte(*(pte));\
-			pte_clear((pte));\
-			break;\
-		}\
-		if (!(ctxp)->nr) \
-			(ctxp)->start_addr = (addr);\
-		(ctxp)->ptes[(ctxp)->nr++] = ptep_get_and_clear(pte);\
-		(ctxp)->end_addr = (addr) + PAGE_SIZE;\
-		if ((ctxp)->nr >= FREE_PTE_NR)\
-			tlb_finish_mmu((ctxp), 0, 0);\
-	} while (0)
-
-/* tlb_finish_mmu
- *	Called at the end of the shootdown operation to free up any resources
- *	that were required.  The page table lock is still held at this point.
- */
-static inline void tlb_finish_mmu(struct free_pte_ctx *ctx, unsigned long start, unsigned long end)
+static inline void tlb_flush_mmu(mmu_gather_t *tlb, unsigned long start, unsigned long end)
 {
 	unsigned long i, nr;

 	/* Handle the fast case first. */
-	if (ctx->nr == ~0UL) {
-		flush_tlb_range(ctx->vma, start, end);
+	if (tlb->nr == ~0UL) {
+		flush_tlb_mm(tlb->mm);
 		return;
 	}
-	nr = ctx->nr;
-	ctx->nr = 0;
+	nr = tlb->nr;
+	tlb->nr = 0;
 	if (nr)
-		flush_tlb_range(ctx->vma, ctx->start_addr, ctx->end_addr);
+		flush_tlb_mm(tlb->mm);
 	for (i=0; i < nr; i++) {
-		pte_t pte = ctx->ptes[i];
+		pte_t pte = tlb->ptes[i];
 		__free_pte(pte);
 	}
 }

-#else
-
-/* The uniprocessor functions are quite simple and are inline macros in an
- * attempt to get gcc to generate optimal code since this code is run on each
- * page in a process at exit.
+/* tlb_finish_mmu
+ *	Called at the end of the shootdown operation to free up any resources
+ *	that were required.  The page table lock is still held at this point.
 */
-typedef struct vm_area_struct mmu_gather_t;
+static inline void tlb_finish_mmu(mmu_gather_t *tlb, unsigned long start, unsigned long end)
+{
+	int freed = tlb->freed;
+	struct mm_struct *mm = tlb->mm;
+	int rss = mm->rss;
+
+	if (rss < freed)
+		freed = rss;
+	mm->rss = rss - freed;

-#define tlb_gather_mmu(vma)	(vma)
-#define tlb_finish_mmu(tlb, start, end)	flush_tlb_range(tlb, start, end)
-#define tlb_remove_page(tlb, ptep, addr)	do {\
-		pte_t __pte = *(ptep);\
-		pte_clear(ptep);\
-		__free_pte(__pte);\
-	} while (0)
+	tlb_flush_mmu(tlb, start, end);
+}

-#endif

+/* void tlb_remove_page(mmu_gather_t *tlb, pte_t *ptep, unsigned long addr)
+ *	Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
+ *	handling the additional races in SMP caused by other CPUs caching valid
+ *	mappings in their TLBs.
+ */
+static inline void tlb_remove_page(mmu_gather_t *tlb, pte_t *pte, unsigned long addr)
+{
+	struct page *page;
+	unsigned long pfn = pte_pfn(*pte);
+
+	if (pfn_valid(pfn)) {
+		page = pfn_to_page(pfn);
+		if (!PageReserved(page))
+			tlb->freed++;
+	}
+
+	/* Handle the common case fast, first. */\
+	if (tlb->nr == ~0UL) {
+		__free_pte(*pte);
+		pte_clear(pte);
+		return;
+	}
+	if (!tlb->nr)
+		tlb->start_addr = addr;
+	tlb->ptes[tlb->nr++] = ptep_get_and_clear(pte);
+	tlb->end_addr = addr + PAGE_SIZE;
+	if (tlb->nr >= FREE_PTE_NR)
+		tlb_finish_mmu(tlb, 0, 0);
+}

 #endif /* _ASM_GENERIC__TLB_H */

--- a/mm/memory.c
+++ b/mm/memory.c
@@ -133,18 +133,18 @@ static inline void free_one_pgd(pgd_t * dir)
 /*
 * This function clears all user-level page tables of a process - this
 * is needed by execve(), so that old pages aren't in the way.
+ *
+ * Must be called with pagetable lock held.
 */
 void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr)
 {
 	pgd_t * page_dir = mm->pgd;

-	spin_lock(&mm->page_table_lock);
 	page_dir += first;
 	do {
 		free_one_pgd(page_dir);
 		page_dir++;
 	} while (--nr);
-	spin_unlock(&mm->page_table_lock);

 	/* keep the page table cache within bounds */
 	check_pgt_cache();
@@ -340,18 +340,17 @@ static inline void forget_pte(pte_t page)
 	}
 }

-static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
+static void zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
 {
 	unsigned long offset;
 	pte_t *ptep;
-	int freed = 0;

 	if (pmd_none(*pmd))
-		return 0;
+		return;
 	if (pmd_bad(*pmd)) {
 		pmd_ERROR(*pmd);
 		pmd_clear(pmd);
-		return 0;
+		return;
 	}
 	ptep = pte_offset_map(pmd, address);
 	offset = address & ~PMD_MASK;
@@ -363,13 +362,6 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad
 		if (pte_none(pte))
 			continue;
 		if (pte_present(pte)) {
-			struct page *page;
-			unsigned long pfn = pte_pfn(pte);
-			if (pfn_valid(pfn)) {
-				page = pfn_to_page(pfn);
-				if (!PageReserved(page))
-					freed++;
-			}
 			/* This will eventually call __free_pte on the pte. */
 			tlb_remove_page(tlb, ptep, address + offset);
 		} else {
@@ -378,34 +370,45 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad
 		}
 	}
 	pte_unmap(ptep-1);
-
-	return freed;
 }

-static inline int zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size)
+static void zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size)
 {
 	pmd_t * pmd;
 	unsigned long end;
-	int freed;

 	if (pgd_none(*dir))
-		return 0;
+		return;
 	if (pgd_bad(*dir)) {
 		pgd_ERROR(*dir);
 		pgd_clear(dir);
-		return 0;
+		return;
 	}
 	pmd = pmd_offset(dir, address);
 	end = address + size;
 	if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
 		end = ((address + PGDIR_SIZE) & PGDIR_MASK);
-	freed = 0;
 	do {
-		freed += zap_pte_range(tlb, pmd, address, end - address);
+		zap_pte_range(tlb, pmd, address, end - address);
 		address = (address + PMD_SIZE) & PMD_MASK; 
 		pmd++;
 	} while (address < end);
-	return freed;
+}
+
+void unmap_page_range(mmu_gather_t *tlb, struct vm_area_struct *vma, unsigned long address, unsigned long end)
+{
+	pgd_t * dir;
+
+	if (address >= end)
+		BUG();
+	dir = pgd_offset(vma->vm_mm, address);
+	tlb_start_vma(tlb, vma);
+	do {
+		zap_pmd_range(tlb, dir, address, end - address);
+		address = (address + PGDIR_SIZE) & PGDIR_MASK;
+		dir++;
+	} while (address && (address < end));
+	tlb_end_vma(tlb, vma);
 }

 /*
@@ -417,7 +420,6 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned
 	mmu_gather_t *tlb;
 	pgd_t * dir;
 	unsigned long start = address, end = address + size;
-	int freed = 0;

 	dir = pgd_offset(mm, address);

@@ -432,25 +434,10 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned
 		BUG();
 	spin_lock(&mm->page_table_lock);
 	flush_cache_range(vma, address, end);
-	tlb = tlb_gather_mmu(vma);

-	do {
-		freed += zap_pmd_range(tlb, dir, address, end - address);
-		address = (address + PGDIR_SIZE) & PGDIR_MASK;
-		dir++;
-	} while (address && (address < end));
-
-	/* this will flush any remaining tlb entries */
+	tlb = tlb_gather_mmu(mm);
+	unmap_page_range(tlb, vma, address, end);
 	tlb_finish_mmu(tlb, start, end);
-
-	/*
-	 * Update rss for the mm_struct (not necessarily current->mm)
-	 * Notice that rss is an unsigned long.
-	 */
-	if (mm->rss > freed)
-		mm->rss -= freed;
-	else
-		mm->rss = 0;
 	spin_unlock(&mm->page_table_lock);
 }


--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -17,7 +17,9 @@

 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+extern void unmap_page_range(mmu_gather_t *,struct vm_area_struct *vma, unsigned long address, unsigned long size);

 /*
 * WARNING: the debugging will use recursive algorithms so never enable this
@@ -329,11 +331,11 @@ static void __vma_link(struct mm_struct * mm, struct vm_area_struct * vma,  stru
 static inline void vma_link(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
 			    rb_node_t ** rb_link, rb_node_t * rb_parent)
 {
-	lock_vma_mappings(vma);
 	spin_lock(&mm->page_table_lock);
+	lock_vma_mappings(vma);
 	__vma_link(mm, vma, prev, rb_link, rb_parent);
-	spin_unlock(&mm->page_table_lock);
 	unlock_vma_mappings(vma);
+	spin_unlock(&mm->page_table_lock);

 	mm->map_count++;
 	validate_mm(mm);
@@ -781,13 +783,11 @@ static struct vm_area_struct * unmap_fixup(struct mm_struct *mm,
 		 */
 		area->vm_end = addr;
 		lock_vma_mappings(area);
-		spin_lock(&mm->page_table_lock);
 	} else if (addr == area->vm_start) {
 		area->vm_pgoff += (end - area->vm_start) >> PAGE_SHIFT;
 		/* same locking considerations of the above case */
 		area->vm_start = end;
 		lock_vma_mappings(area);
-		spin_lock(&mm->page_table_lock);
 	} else {
 	/* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
 		/* Add end mapping -- leave beginning for below */
@@ -814,12 +814,10 @@ static struct vm_area_struct * unmap_fixup(struct mm_struct *mm,
 		 * things correctly.
 		 */
 		lock_vma_mappings(area);
-		spin_lock(&mm->page_table_lock);
 		__insert_vm_struct(mm, mpnt);
 	}

 	__insert_vm_struct(mm, area);
-	spin_unlock(&mm->page_table_lock);
 	unlock_vma_mappings(area);
 	return extra;
 }
@@ -889,6 +887,7 @@ static void free_pgtables(struct mm_struct * mm, struct vm_area_struct *prev,
 */
 int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
 {
+	mmu_gather_t *tlb;
 	struct vm_area_struct *mpnt, *prev, **npp, *free, *extra;

 	if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
@@ -933,7 +932,8 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
 		rb_erase(&mpnt->vm_rb, &mm->mm_rb);
 	}
 	mm->mmap_cache = NULL;	/* Kill the cache. */
-	spin_unlock(&mm->page_table_lock);
+
+	tlb = tlb_gather_mmu(mm);

 	/* Ok - we have the memory areas we should free on the 'free' list,
 	 * so release them, and unmap the page range..
@@ -942,7 +942,7 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
 	 * In that case we have to be careful with VM_DENYWRITE.
 	 */
 	while ((mpnt = free) != NULL) {
-		unsigned long st, end, size;
+		unsigned long st, end;
 		struct file *file = NULL;

 		free = free->vm_next;
@@ -950,7 +950,6 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
 		st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
 		end = addr+len;
 		end = end > mpnt->vm_end ? mpnt->vm_end : end;
-		size = end - st;

 		if (mpnt->vm_flags & VM_DENYWRITE &&
 		    (st != mpnt->vm_start || end != mpnt->vm_end) &&
@@ -960,12 +959,12 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
 		remove_shared_vm_struct(mpnt);
 		mm->map_count--;

-		zap_page_range(mpnt, st, size);
+		unmap_page_range(tlb, mpnt, st, end);

 		/*
 		 * Fix the mapping, and free the old area if it wasn't reused.
 		 */
-		extra = unmap_fixup(mm, mpnt, st, size, extra);
+		extra = unmap_fixup(mm, mpnt, st, end-st, extra);
 		if (file)
 			atomic_inc(&file->f_dentry->d_inode->i_writecount);
 	}
@@ -976,6 +975,8 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
 		kmem_cache_free(vm_area_cachep, extra);

 	free_pgtables(mm, prev, addr, addr+len);
+	tlb_finish_mmu(tlb, addr, addr+len);
+	spin_unlock(&mm->page_table_lock);

 	return 0;
 }
@@ -1092,6 +1093,7 @@ void build_mmap_rb(struct mm_struct * mm)
 /* Release all mmaps. */
 void exit_mmap(struct mm_struct * mm)
 {
+	mmu_gather_t *tlb;
 	struct vm_area_struct * mpnt;

 	release_segments(mm);
@@ -1100,16 +1102,16 @@ void exit_mmap(struct mm_struct * mm)
 	mm->mmap = mm->mmap_cache = NULL;
 	mm->mm_rb = RB_ROOT;
 	mm->rss = 0;
-	spin_unlock(&mm->page_table_lock);
 	mm->total_vm = 0;
 	mm->locked_vm = 0;

+	tlb = tlb_gather_mmu(mm);
+
 	flush_cache_mm(mm);
 	while (mpnt) {
 		struct vm_area_struct * next = mpnt->vm_next;
 		unsigned long start = mpnt->vm_start;
 		unsigned long end = mpnt->vm_end;
-		unsigned long size = end - start;

 		if (mpnt->vm_ops) {
 			if (mpnt->vm_ops->close)
@@ -1117,19 +1119,20 @@ void exit_mmap(struct mm_struct * mm)
 		}
 		mm->map_count--;
 		remove_shared_vm_struct(mpnt);
-		zap_page_range(mpnt, start, size);
+		unmap_page_range(tlb, mpnt, start, end);
 		if (mpnt->vm_file)
 			fput(mpnt->vm_file);
 		kmem_cache_free(vm_area_cachep, mpnt);
 		mpnt = next;
 	}
-	flush_tlb_mm(mm);

 	/* This is just debugging */
 	if (mm->map_count)
 		BUG();

 	clear_page_tables(mm, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
+	tlb_finish_mmu(tlb, FIRST_USER_PGD_NR*PGDIR_SIZE, USER_PTRS_PER_PGD*PGDIR_SIZE);
+	spin_unlock(&mm->page_table_lock);
 }

 /* Insert vm structure into process list sorted by address