Cleanup munmap a lot. Fix Intel P4 TLB corruptions on SMP.

Special thanks to Intel for support and traces.

Cleanup munmap a lot. Fix Intel P4 TLB corruptions on SMP.
Special thanks to Intel for support and traces.
5200ffe8 · Linus Torvalds · 4cc4c697 · 5200ffe8 · 5200ffe8
Commit 5200ffe8 authored May 17, 2002 by Linus Torvalds
Hide whitespace changes
Inline Side-by-side

Showing with 171 additions and 171 deletions

include/asm-generic/tlb.h include/asm-generic/tlb.h +2 -8

mm/mmap.c mm/mmap.c +169 -163

No files found.
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -44,18 +44,12 @@ extern mmu_gather_t	mmu_gathers[NR_CPUS];
 static inline mmu_gather_t *tlb_gather_mmu(struct mm_struct *mm)
 {
 	mmu_gather_t *tlb = &mmu_gathers[smp_processor_id()];
-	unsigned long nr;
 	tlb->mm = mm;
 	tlb->freed = 0;
-	/* Use fast mode if this MM only exists on this CPU */
+	/* Use fast mode if only one CPU is online */
-	nr = ~0UL;
+	tlb->nr = smp_num_cpus > 1 ? 0UL : ~0UL;
-#ifdef CONFIG_SMP
-	if (mm->cpu_vm_mask != (1<<smp_processor_id()))
-		nr = 0UL;
-#endif
-	tlb->nr = nr;
 	return tlb;
 }

--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -732,97 +732,6 @@ struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long add
 	return vma;
 }
-/* Normal function to fix up a mapping
- * This function is the default for when an area has no specific
- * function.  This may be used as part of a more specific routine.
- * This function works out what part of an area is affected and
- * adjusts the mapping information.  Since the actual page
- * manipulation is done in do_mmap(), none need be done here,
- * though it would probably be more appropriate.
- *
- * By the time this function is called, the area struct has been
- * removed from the process mapping list, so it needs to be
- * reinserted if necessary.
- *
- * The 4 main cases are:
- *    Unmapping the whole area
- *    Unmapping from the start of the segment to a point in it
- *    Unmapping from an intermediate point to the end
- *    Unmapping between to intermediate points, making a hole.
- *
- * Case 4 involves the creation of 2 new areas, for each side of
- * the hole.  If possible, we reuse the existing area rather than
- * allocate a new one, and the return indicates whether the old
- * area was reused.
- */
-static struct vm_area_struct * unmap_fixup(struct mm_struct *mm, 
-	struct vm_area_struct *area, unsigned long addr, size_t len, 
-	struct vm_area_struct *extra)
-{
-	struct vm_area_struct *mpnt;
-	unsigned long end = addr + len;
-	area->vm_mm->total_vm -= len >> PAGE_SHIFT;
-	if (area->vm_flags & VM_LOCKED)
-		area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
-	/* Unmapping the whole area. */
-	if (addr == area->vm_start && end == area->vm_end) {
-		if (area->vm_ops && area->vm_ops->close)
-			area->vm_ops->close(area);
-		if (area->vm_file)
-			fput(area->vm_file);
-		kmem_cache_free(vm_area_cachep, area);
-		return extra;
-	}
-	/* Work out to one of the ends. */
-	if (end == area->vm_end) {
-		/*
-		 * here area isn't visible to the semaphore-less readers
-		 * so we don't need to update it under the spinlock.
-		 */
-		area->vm_end = addr;
-		lock_vma_mappings(area);
-	} else if (addr == area->vm_start) {
-		area->vm_pgoff += (end - area->vm_start) >> PAGE_SHIFT;
-		/* same locking considerations of the above case */
-		area->vm_start = end;
-		lock_vma_mappings(area);
-	} else {
-	/* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
-		/* Add end mapping -- leave beginning for below */
-		mpnt = extra;
-		extra = NULL;
-		mpnt->vm_mm = area->vm_mm;
-		mpnt->vm_start = end;
-		mpnt->vm_end = area->vm_end;
-		mpnt->vm_page_prot = area->vm_page_prot;
-		mpnt->vm_flags = area->vm_flags;
-		mpnt->vm_raend = 0;
-		mpnt->vm_ops = area->vm_ops;
-		mpnt->vm_pgoff = area->vm_pgoff + ((end - area->vm_start) >> PAGE_SHIFT);
-		mpnt->vm_file = area->vm_file;
-		mpnt->vm_private_data = area->vm_private_data;
-		if (mpnt->vm_file)
-			get_file(mpnt->vm_file);
-		if (mpnt->vm_ops && mpnt->vm_ops->open)
-			mpnt->vm_ops->open(mpnt);
-		area->vm_end = addr;	/* Truncate area */
-		/* Because mpnt->vm_file == area->vm_file this locks
-		 * things correctly.
-		 */
-		lock_vma_mappings(area);
-		__insert_vm_struct(mm, mpnt);
-	}
-	__insert_vm_struct(mm, area);
-	unlock_vma_mappings(area);
-	return extra;
-}
 /*
 * Try to free as many page directory entries as we can,
 * without having to work very hard at actually scanning
@@ -882,104 +791,201 @@ static void free_pgtables(mmu_gather_t *tlb, struct vm_area_struct *prev,
 	}
 }
-/* Munmap is split into 2 main parts -- this part which finds
+/* Normal function to fix up a mapping
- * what needs doing, and the areas themselves, which do the
+ * This function is the default for when an area has no specific
- * work.  This now handles partial unmappings.
+ * function.  This may be used as part of a more specific routine.
- * Jeremy Fitzhardine <jeremy@sw.oz.au>
+ *
+ * By the time this function is called, the area struct has been
+ * removed from the process mapping list.
 */
-int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
+static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
+{
+	size_t len = area->vm_end - area->vm_start;
+	area->vm_mm->total_vm -= len >> PAGE_SHIFT;
+	if (area->vm_flags & VM_LOCKED)
+		area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
+	remove_shared_vm_struct(area);
+	if (area->vm_ops && area->vm_ops->close)
+		area->vm_ops->close(area);
+	if (area->vm_file)
+		fput(area->vm_file);
+	kmem_cache_free(vm_area_cachep, area);
+}
+/*
+ * Update the VMA and inode share lists.
+ *
+ * Ok - we have the memory areas we should free on the 'free' list,
+ * so release them, and do the vma updates.
+ */
+static void unmap_vma_list(struct mm_struct *mm,
+	struct vm_area_struct *mpnt)
+{
+	do {
+		struct vm_area_struct *next = mpnt->vm_next;
+		unmap_vma(mm, mpnt);
+		mpnt = next;
+	} while (mpnt != NULL);
+	validate_mm(mm);
+}
+/*
+ * Get rid of page table information in the indicated region.
+ *
+ * Called with the page table lock held.
+ */
+static void unmap_region(struct mm_struct *mm,
+	struct vm_area_struct *mpnt,
+	struct vm_area_struct *prev,
+	unsigned long start,
+	unsigned long end)
 {
 	mmu_gather_t *tlb;
-	struct vm_area_struct *mpnt, *prev, **npp, *free, *extra;
-	if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
+	tlb = tlb_gather_mmu(mm);
-		return -EINVAL;
-	if ((len = PAGE_ALIGN(len)) == 0)
+	do {
-		return -EINVAL;
+		unsigned long from, to;
-	/* Check if this memory area is ok - put it on the temporary
+		from = start < mpnt->vm_start ? mpnt->vm_start : start;
-	 * list if so..  The checks here are pretty simple --
+		to = end > mpnt->vm_end ? mpnt->vm_end : end;
-	 * every area affected in some way (by any overlap) is put
-	 * on the list.  If nothing is put on, nothing is affected.
-	 */
-	mpnt = find_vma_prev(mm, addr, &prev);
-	if (!mpnt)
-		return 0;
-	/* we have  addr < mpnt->vm_end  */
-	if (mpnt->vm_start >= addr+len)
+		unmap_page_range(tlb, mpnt, from, to);
-		return 0;
+	} while ((mpnt = mpnt->vm_next) != NULL);
-	/* If we'll make "hole", check the vm areas limit */
+	free_pgtables(tlb, prev, start, end);
-	if ((mpnt->vm_start < addr && mpnt->vm_end > addr+len)
+	tlb_finish_mmu(tlb, start, end);
-	    && mm->map_count >= MAX_MAP_COUNT)
+}
-		return -ENOMEM;
-	/*
+/*
-	 * We may need one additional vma to fix up the mappings ... 
+ * Create a list of vma's touched by the unmap,
-	 * and this is the last chance for an easy error exit.
+ * removing them from the VM lists as we go..
-	 */
+ *
-	extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ * Called with the page_table_lock held.
-	if (!extra)
+ */
-		return -ENOMEM;
+static struct vm_area_struct *touched_by_munmap(struct mm_struct *mm,
+	struct vm_area_struct *mpnt,
+	struct vm_area_struct *prev,
+	unsigned long end)
+{
+	struct vm_area_struct **npp, *touched;
 	npp = (prev ? &prev->vm_next : &mm->mmap);
-	free = NULL;
-	spin_lock(&mm->page_table_lock);
+	touched = NULL;
-	for ( ; mpnt && mpnt->vm_start < addr+len; mpnt = *npp) {
+	do {
-		*npp = mpnt->vm_next;
+		struct vm_area_struct *next = mpnt->vm_next;
-		mpnt->vm_next = free;
+		mpnt->vm_next = touched;
-		free = mpnt;
+		touched = mpnt;
+		mm->map_count--;
 		rb_erase(&mpnt->vm_rb, &mm->mm_rb);
-	}
+		mpnt = next;
+	} while (mpnt && mpnt->vm_start < end);
+	*npp = mpnt;
 	mm->mmap_cache = NULL;	/* Kill the cache. */
+	return touched;
+}
-	tlb = tlb_gather_mmu(mm);
+/*
+ * Split a vma into two pieces at address 'addr', the original vma
+ * will contain the first part, a new vma is allocated for the tail.
+ */
+static int splitvma(struct mm_struct *mm, struct vm_area_struct *mpnt, unsigned long addr)
+{
+	struct vm_area_struct *new;
-	/* Ok - we have the memory areas we should free on the 'free' list,
+	if (mm->map_count >= MAX_MAP_COUNT)
-	 * so release them, and unmap the page range..
+		return -ENOMEM;
-	 * If the one of the segments is only being partially unmapped,
-	 * it will put new vm_area_struct(s) into the address space.
-	 * In that case we have to be careful with VM_DENYWRITE.
-	 */
-	while ((mpnt = free) != NULL) {
-		unsigned long st, end;
-		struct file *file = NULL;
-		free = free->vm_next;
+	new = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (!new)
+		return -ENOMEM;
-		st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
+	/* most fields are the same, copy all, and then fixup */
-		end = addr+len;
+	*new = *mpnt;
-		end = end > mpnt->vm_end ? mpnt->vm_end : end;
-		if (mpnt->vm_flags & VM_DENYWRITE &&
+	new->vm_start = addr;
-		    (st != mpnt->vm_start || end != mpnt->vm_end) &&
+	new->vm_pgoff = mpnt->vm_pgoff + ((addr - mpnt->vm_start) >> PAGE_SHIFT);
-		    (file = mpnt->vm_file) != NULL) {
+	new->vm_raend = 0;
+	if (mpnt->vm_file) {
+		struct file *file = mpnt->vm_file;
+		get_file(file);
+		if (mpnt->vm_flags & VM_DENYWRITE)
 			atomic_dec(&file->f_dentry->d_inode->i_writecount);
-		}
+	}
-		remove_shared_vm_struct(mpnt);
-		mm->map_count--;
+	if (mpnt->vm_ops && mpnt->vm_ops->open)
+		mpnt->vm_ops->open(mpnt);
+	mpnt->vm_end = addr;	/* Truncate area */
+	spin_lock(&mm->page_table_lock);
+	lock_vma_mappings(mpnt);
+	__insert_vm_struct(mm, new);
+	unlock_vma_mappings(mpnt);
+	spin_unlock(&mm->page_table_lock);
-		unmap_page_range(tlb, mpnt, st, end);
+	return 0;
+}
+/* Munmap is split into 2 main parts -- this part which finds
+ * what needs doing, and the areas themselves, which do the
+ * work.  This now handles partial unmappings.
+ * Jeremy Fitzhardine <jeremy@sw.oz.au>
+ */
+int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
+{
+	unsigned long end;
+	struct vm_area_struct *mpnt, *prev, *last;
+	if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
+		return -EINVAL;
+	if ((len = PAGE_ALIGN(len)) == 0)
+		return -EINVAL;
+	/* Find the first overlapping VMA */
+	mpnt = find_vma_prev(mm, start, &prev);
+	if (!mpnt)
+		return 0;
+	/* we have  start < mpnt->vm_end  */
+	/* if it doesn't overlap, we have nothing.. */
+	end = start + len;
+	if (mpnt->vm_start >= end)
+		return 0;
-		/*
+	/*
-		 * Fix the mapping, and free the old area if it wasn't reused.
+	 * If we need to split any vma, do it now to save pain later.
-		 */
+	 */
-		extra = unmap_fixup(mm, mpnt, st, end-st, extra);
+	if (start > mpnt->vm_start) {
-		if (file)
+		if (splitvma(mm, mpnt, start))
-			atomic_inc(&file->f_dentry->d_inode->i_writecount);
+			return -ENOMEM;
+		prev = mpnt;
+		mpnt = mpnt->vm_next;
 	}
-	validate_mm(mm);
-	/* Release the extra vma struct if it wasn't used */
+	/* Does it split the last one? */
-	if (extra)
+	last = find_vma(mm, end);
-		kmem_cache_free(vm_area_cachep, extra);
+	if (last && end > last->vm_start) {
+		if (splitvma(mm, last, end))
+			return -ENOMEM;
+	}
-	free_pgtables(tlb, prev, addr, addr+len);
+	/*
-	tlb_finish_mmu(tlb, addr, addr+len);
+	 * Remove the vma's, and unmap the actual pages
+	 */
+	spin_lock(&mm->page_table_lock);
+	mpnt = touched_by_munmap(mm, mpnt, prev, end);
+	unmap_region(mm, mpnt, prev, start, end);
 	spin_unlock(&mm->page_table_lock);
+	/* Fix up all other VM information */
+	unmap_vma_list(mm, mpnt);
 	return 0;
 }