Commit c0868962 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Convert i_shared_sem back to a spinlock

Having a semaphore in there causes modest performance regressions on heavily
mmap-intensive workloads on some hardware.  Specifically, up to 30% in SDET on
NUMAQ and big PPC64.

So switch it back to being a spinlock.  This does mean that unmap_vmas() needs
to be told whether or not it is allowed to schedule away; that's simple to do
via the zap_details structure.

This change means that there will be high scheuling latencies when someone
truncates a large file which is currently mmapped, but nobody does that
anyway.  The scheduling points in unmap_vmas() are mainly for munmap() and
exit(), and they still will work OK for that.

From: Hugh Dickins <hugh@veritas.com>

  Sorry, my premature optimizations (trying to pass down NULL zap_details
  except when needed) have caught you out doubly: unmap_mapping_range_list was
  NULLing the details even though atomic was set; and if it hadn't, then
  zap_pte_range would have missed free_swap_and_cache and pte_clear when pte
  not present.  Moved the optimization into zap_pte_range itself.  Plus
  massive documentation update.

From: Hugh Dickins <hugh@veritas.com>

  Here's a second patch to add to the first: mremap's cows can't come home
  without releasing the i_mmap_lock, better move the whole "Subtle point"
  locking from move_vma into move_page_tables.  And it's possible for the file
  that was behind an anonymous page to be truncated while we drop that lock,
  don't want to abort mremap because of VM_FAULT_SIGBUS.

  (Eek, should we be checking do_swap_page of a vm_file area against the
  truncate_count sequence?  Technically yes, but I doubt we need bother.)


- We cannot hold i_mmap_lock across move_one_page() because
  move_one_page() needs to perform __GFP_WAIT allocations of pagetable pages.

- Move the cond_resched() out so we test it once per page rather than only
  when move_one_page() returns -EAGAIN.
parent 71a18745
...@@ -66,7 +66,7 @@ in some cases it is not really needed. Eg, vm_start is modified by ...@@ -66,7 +66,7 @@ in some cases it is not really needed. Eg, vm_start is modified by
expand_stack(), it is hard to come up with a destructive scenario without expand_stack(), it is hard to come up with a destructive scenario without
having the vmlist protection in this case. having the vmlist protection in this case.
The page_table_lock nests with the inode i_shared_sem and the kmem cache The page_table_lock nests with the inode i_mmap_lock and the kmem cache
c_spinlock spinlocks. This is okay, since the kmem code asks for pages after c_spinlock spinlocks. This is okay, since the kmem code asks for pages after
dropping c_spinlock. The page_table_lock also nests with pagecache_lock and dropping c_spinlock. The page_table_lock also nests with pagecache_lock and
pagemap_lru_lock spinlocks, and no code asks for memory with these locks pagemap_lru_lock spinlocks, and no code asks for memory with these locks
......
...@@ -321,12 +321,12 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) ...@@ -321,12 +321,12 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
pgoff = offset >> HPAGE_SHIFT; pgoff = offset >> HPAGE_SHIFT;
inode->i_size = offset; inode->i_size = offset;
down(&mapping->i_shared_sem); spin_lock(&mapping->i_mmap_lock);
if (!list_empty(&mapping->i_mmap)) if (!list_empty(&mapping->i_mmap))
hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
if (!list_empty(&mapping->i_mmap_shared)) if (!list_empty(&mapping->i_mmap_shared))
hugetlb_vmtruncate_list(&mapping->i_mmap_shared, pgoff); hugetlb_vmtruncate_list(&mapping->i_mmap_shared, pgoff);
up(&mapping->i_shared_sem); spin_unlock(&mapping->i_mmap_lock);
truncate_hugepages(mapping, offset); truncate_hugepages(mapping, offset);
return 0; return 0;
} }
......
...@@ -196,7 +196,7 @@ void inode_init_once(struct inode *inode) ...@@ -196,7 +196,7 @@ void inode_init_once(struct inode *inode)
init_rwsem(&inode->i_alloc_sem); init_rwsem(&inode->i_alloc_sem);
INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
spin_lock_init(&inode->i_data.tree_lock); spin_lock_init(&inode->i_data.tree_lock);
init_MUTEX(&inode->i_data.i_shared_sem); spin_lock_init(&inode->i_data.i_mmap_lock);
atomic_set(&inode->i_data.truncate_count, 0); atomic_set(&inode->i_data.truncate_count, 0);
INIT_LIST_HEAD(&inode->i_data.private_list); INIT_LIST_HEAD(&inode->i_data.private_list);
spin_lock_init(&inode->i_data.private_lock); spin_lock_init(&inode->i_data.private_lock);
......
...@@ -331,7 +331,7 @@ struct address_space { ...@@ -331,7 +331,7 @@ struct address_space {
struct address_space_operations *a_ops; /* methods */ struct address_space_operations *a_ops; /* methods */
struct list_head i_mmap; /* list of private mappings */ struct list_head i_mmap; /* list of private mappings */
struct list_head i_mmap_shared; /* list of shared mappings */ struct list_head i_mmap_shared; /* list of shared mappings */
struct semaphore i_shared_sem; /* protect both above lists */ spinlock_t i_mmap_lock; /* protect both above lists */
atomic_t truncate_count; /* Cover race condition with truncate */ atomic_t truncate_count; /* Cover race condition with truncate */
unsigned long flags; /* error bits/gfp mask */ unsigned long flags; /* error bits/gfp mask */
struct backing_dev_info *backing_dev_info; /* device readahead, etc */ struct backing_dev_info *backing_dev_info; /* device readahead, etc */
......
...@@ -477,6 +477,7 @@ struct zap_details { ...@@ -477,6 +477,7 @@ struct zap_details {
struct address_space *check_mapping; /* Check page->mapping if set */ struct address_space *check_mapping; /* Check page->mapping if set */
pgoff_t first_index; /* Lowest page->index to unmap */ pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */ pgoff_t last_index; /* Highest page->index to unmap */
int atomic; /* May not schedule() */
}; };
void zap_page_range(struct vm_area_struct *vma, unsigned long address, void zap_page_range(struct vm_area_struct *vma, unsigned long address,
......
...@@ -69,15 +69,9 @@ static inline int mremap_moved_anon_rmap(struct page *page, unsigned long addr) ...@@ -69,15 +69,9 @@ static inline int mremap_moved_anon_rmap(struct page *page, unsigned long addr)
static inline int make_page_exclusive(struct vm_area_struct *vma, static inline int make_page_exclusive(struct vm_area_struct *vma,
unsigned long addr) unsigned long addr)
{ {
switch (handle_mm_fault(vma->vm_mm, vma, addr, 1)) { if (handle_mm_fault(vma->vm_mm, vma, addr, 1) != VM_FAULT_OOM)
case VM_FAULT_MINOR:
case VM_FAULT_MAJOR:
return 0; return 0;
case VM_FAULT_OOM:
return -ENOMEM; return -ENOMEM;
default:
return -EFAULT;
}
} }
/* /*
......
...@@ -324,9 +324,9 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) ...@@ -324,9 +324,9 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
atomic_dec(&inode->i_writecount); atomic_dec(&inode->i_writecount);
/* insert tmp into the share list, just after mpnt */ /* insert tmp into the share list, just after mpnt */
down(&file->f_mapping->i_shared_sem); spin_lock(&file->f_mapping->i_mmap_lock);
list_add(&tmp->shared, &mpnt->shared); list_add(&tmp->shared, &mpnt->shared);
up(&file->f_mapping->i_shared_sem); spin_unlock(&file->f_mapping->i_mmap_lock);
} }
/* /*
......
...@@ -55,17 +55,17 @@ ...@@ -55,17 +55,17 @@
/* /*
* Lock ordering: * Lock ordering:
* *
* ->i_shared_sem (vmtruncate) * ->i_mmap_lock (vmtruncate)
* ->private_lock (__free_pte->__set_page_dirty_buffers) * ->private_lock (__free_pte->__set_page_dirty_buffers)
* ->swap_list_lock * ->swap_list_lock
* ->swap_device_lock (exclusive_swap_page, others) * ->swap_device_lock (exclusive_swap_page, others)
* ->mapping->tree_lock * ->mapping->tree_lock
* *
* ->i_sem * ->i_sem
* ->i_shared_sem (truncate->unmap_mapping_range) * ->i_mmap_lock (truncate->unmap_mapping_range)
* *
* ->mmap_sem * ->mmap_sem
* ->i_shared_sem (various places) * ->i_mmap_lock (various places)
* *
* ->mmap_sem * ->mmap_sem
* ->lock_page (access_process_vm) * ->lock_page (access_process_vm)
......
...@@ -92,16 +92,14 @@ static long madvise_willneed(struct vm_area_struct * vma, ...@@ -92,16 +92,14 @@ static long madvise_willneed(struct vm_area_struct * vma,
static long madvise_dontneed(struct vm_area_struct * vma, static long madvise_dontneed(struct vm_area_struct * vma,
unsigned long start, unsigned long end) unsigned long start, unsigned long end)
{ {
struct zap_details details;
if (vma->vm_flags & VM_LOCKED) if (vma->vm_flags & VM_LOCKED)
return -EINVAL; return -EINVAL;
if (unlikely(vma->vm_flags & VM_NONLINEAR)) { if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
details.check_mapping = NULL; struct zap_details details = {
details.nonlinear_vma = vma; .nonlinear_vma = vma,
details.first_index = 0; .last_index = ULONG_MAX,
details.last_index = ULONG_MAX; };
zap_page_range(vma, start, end - start, &details); zap_page_range(vma, start, end - start, &details);
} else } else
zap_page_range(vma, start, end - start, NULL); zap_page_range(vma, start, end - start, NULL);
......
...@@ -365,6 +365,8 @@ static void zap_pte_range(struct mmu_gather *tlb, ...@@ -365,6 +365,8 @@ static void zap_pte_range(struct mmu_gather *tlb,
if (offset + size > PMD_SIZE) if (offset + size > PMD_SIZE)
size = PMD_SIZE - offset; size = PMD_SIZE - offset;
size &= PAGE_MASK; size &= PAGE_MASK;
if (details && !details->check_mapping && !details->nonlinear_vma)
details = NULL;
for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) { for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
pte_t pte = *ptep; pte_t pte = *ptep;
if (pte_none(pte)) if (pte_none(pte))
...@@ -518,6 +520,7 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, ...@@ -518,6 +520,7 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
unsigned long tlb_start = 0; /* For tlb_finish_mmu */ unsigned long tlb_start = 0; /* For tlb_finish_mmu */
int tlb_start_valid = 0; int tlb_start_valid = 0;
int ret = 0; int ret = 0;
int atomic = details && details->atomic;
for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) { for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
unsigned long start; unsigned long start;
...@@ -555,7 +558,7 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, ...@@ -555,7 +558,7 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
zap_bytes -= block; zap_bytes -= block;
if ((long)zap_bytes > 0) if ((long)zap_bytes > 0)
continue; continue;
if (need_resched()) { if (!atomic && need_resched()) {
int fullmm = tlb_is_full_mm(*tlbp); int fullmm = tlb_is_full_mm(*tlbp);
tlb_finish_mmu(*tlbp, tlb_start, start); tlb_finish_mmu(*tlbp, tlb_start, start);
cond_resched_lock(&mm->page_table_lock); cond_resched_lock(&mm->page_table_lock);
...@@ -583,8 +586,6 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address, ...@@ -583,8 +586,6 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long end = address + size; unsigned long end = address + size;
unsigned long nr_accounted = 0; unsigned long nr_accounted = 0;
might_sleep();
if (is_vm_hugetlb_page(vma)) { if (is_vm_hugetlb_page(vma)) {
zap_hugepage_range(vma, address, size); zap_hugepage_range(vma, address, size);
return; return;
...@@ -1133,8 +1134,7 @@ static void unmap_mapping_range_list(struct list_head *head, ...@@ -1133,8 +1134,7 @@ static void unmap_mapping_range_list(struct list_head *head,
zea = vea; zea = vea;
zap_page_range(vma, zap_page_range(vma,
((zba - vba) << PAGE_SHIFT) + vma->vm_start, ((zba - vba) << PAGE_SHIFT) + vma->vm_start,
(zea - zba + 1) << PAGE_SHIFT, (zea - zba + 1) << PAGE_SHIFT, details);
details->check_mapping? details: NULL);
} }
} }
...@@ -1173,10 +1173,11 @@ void unmap_mapping_range(struct address_space *mapping, ...@@ -1173,10 +1173,11 @@ void unmap_mapping_range(struct address_space *mapping,
details.nonlinear_vma = NULL; details.nonlinear_vma = NULL;
details.first_index = hba; details.first_index = hba;
details.last_index = hba + hlen - 1; details.last_index = hba + hlen - 1;
details.atomic = 1; /* A spinlock is held */
if (details.last_index < details.first_index) if (details.last_index < details.first_index)
details.last_index = ULONG_MAX; details.last_index = ULONG_MAX;
down(&mapping->i_shared_sem); spin_lock(&mapping->i_mmap_lock);
/* Protect against page fault */ /* Protect against page fault */
atomic_inc(&mapping->truncate_count); atomic_inc(&mapping->truncate_count);
if (unlikely(!list_empty(&mapping->i_mmap))) if (unlikely(!list_empty(&mapping->i_mmap)))
...@@ -1187,7 +1188,7 @@ void unmap_mapping_range(struct address_space *mapping, ...@@ -1187,7 +1188,7 @@ void unmap_mapping_range(struct address_space *mapping,
if (unlikely(!list_empty(&mapping->i_mmap_shared))) if (unlikely(!list_empty(&mapping->i_mmap_shared)))
unmap_mapping_range_list(&mapping->i_mmap_shared, &details); unmap_mapping_range_list(&mapping->i_mmap_shared, &details);
up(&mapping->i_shared_sem); spin_unlock(&mapping->i_mmap_lock);
} }
EXPORT_SYMBOL(unmap_mapping_range); EXPORT_SYMBOL(unmap_mapping_range);
......
...@@ -63,7 +63,7 @@ EXPORT_SYMBOL(sysctl_max_map_count); ...@@ -63,7 +63,7 @@ EXPORT_SYMBOL(sysctl_max_map_count);
EXPORT_SYMBOL(vm_committed_space); EXPORT_SYMBOL(vm_committed_space);
/* /*
* Requires inode->i_mapping->i_shared_sem * Requires inode->i_mapping->i_mmap_lock
*/ */
static inline void static inline void
__remove_shared_vm_struct(struct vm_area_struct *vma, struct inode *inode) __remove_shared_vm_struct(struct vm_area_struct *vma, struct inode *inode)
...@@ -84,9 +84,9 @@ static void remove_shared_vm_struct(struct vm_area_struct *vma) ...@@ -84,9 +84,9 @@ static void remove_shared_vm_struct(struct vm_area_struct *vma)
if (file) { if (file) {
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
down(&mapping->i_shared_sem); spin_lock(&mapping->i_mmap_lock);
__remove_shared_vm_struct(vma, file->f_dentry->d_inode); __remove_shared_vm_struct(vma, file->f_dentry->d_inode);
up(&mapping->i_shared_sem); spin_unlock(&mapping->i_mmap_lock);
} }
} }
...@@ -286,12 +286,12 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -286,12 +286,12 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
mapping = vma->vm_file->f_mapping; mapping = vma->vm_file->f_mapping;
if (mapping) if (mapping)
down(&mapping->i_shared_sem); spin_lock(&mapping->i_mmap_lock);
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
__vma_link(mm, vma, prev, rb_link, rb_parent); __vma_link(mm, vma, prev, rb_link, rb_parent);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
if (mapping) if (mapping)
up(&mapping->i_shared_sem); spin_unlock(&mapping->i_mmap_lock);
mark_mm_hugetlb(mm, vma); mark_mm_hugetlb(mm, vma);
mm->map_count++; mm->map_count++;
...@@ -301,7 +301,7 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -301,7 +301,7 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
/* /*
* Insert vm structure into process list sorted by address and into the inode's * Insert vm structure into process list sorted by address and into the inode's
* i_mmap ring. The caller should hold mm->page_table_lock and * i_mmap ring. The caller should hold mm->page_table_lock and
* ->f_mappping->i_shared_sem if vm_file is non-NULL. * ->f_mappping->i_mmap_lock if vm_file is non-NULL.
*/ */
static void static void
__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) __insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
...@@ -391,7 +391,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm, ...@@ -391,7 +391,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
{ {
spinlock_t *lock = &mm->page_table_lock; spinlock_t *lock = &mm->page_table_lock;
struct inode *inode = file ? file->f_dentry->d_inode : NULL; struct inode *inode = file ? file->f_dentry->d_inode : NULL;
struct semaphore *i_shared_sem; spinlock_t *i_mmap_lock;
/* /*
* We later require that vma->vm_flags == vm_flags, so this tests * We later require that vma->vm_flags == vm_flags, so this tests
...@@ -400,7 +400,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm, ...@@ -400,7 +400,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
if (vm_flags & VM_SPECIAL) if (vm_flags & VM_SPECIAL)
return NULL; return NULL;
i_shared_sem = file ? &file->f_mapping->i_shared_sem : NULL; i_mmap_lock = file ? &file->f_mapping->i_mmap_lock : NULL;
if (!prev) { if (!prev) {
prev = rb_entry(rb_parent, struct vm_area_struct, vm_rb); prev = rb_entry(rb_parent, struct vm_area_struct, vm_rb);
...@@ -417,7 +417,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm, ...@@ -417,7 +417,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
if (unlikely(file && prev->vm_next && if (unlikely(file && prev->vm_next &&
prev->vm_next->vm_file == file)) { prev->vm_next->vm_file == file)) {
down(i_shared_sem); spin_lock(i_mmap_lock);
need_up = 1; need_up = 1;
} }
spin_lock(lock); spin_lock(lock);
...@@ -435,7 +435,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm, ...@@ -435,7 +435,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
__remove_shared_vm_struct(next, inode); __remove_shared_vm_struct(next, inode);
spin_unlock(lock); spin_unlock(lock);
if (need_up) if (need_up)
up(i_shared_sem); spin_unlock(i_mmap_lock);
if (file) if (file)
fput(file); fput(file);
...@@ -445,7 +445,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm, ...@@ -445,7 +445,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
} }
spin_unlock(lock); spin_unlock(lock);
if (need_up) if (need_up)
up(i_shared_sem); spin_unlock(i_mmap_lock);
return prev; return prev;
} }
...@@ -460,13 +460,13 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm, ...@@ -460,13 +460,13 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
return NULL; return NULL;
if (end == prev->vm_start) { if (end == prev->vm_start) {
if (file) if (file)
down(i_shared_sem); spin_lock(i_mmap_lock);
spin_lock(lock); spin_lock(lock);
prev->vm_start = addr; prev->vm_start = addr;
prev->vm_pgoff -= (end - addr) >> PAGE_SHIFT; prev->vm_pgoff -= (end - addr) >> PAGE_SHIFT;
spin_unlock(lock); spin_unlock(lock);
if (file) if (file)
up(i_shared_sem); spin_unlock(i_mmap_lock);
return prev; return prev;
} }
} }
...@@ -1232,7 +1232,7 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, ...@@ -1232,7 +1232,7 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
mapping = vma->vm_file->f_mapping; mapping = vma->vm_file->f_mapping;
if (mapping) if (mapping)
down(&mapping->i_shared_sem); spin_lock(&mapping->i_mmap_lock);
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
if (new_below) { if (new_below) {
...@@ -1245,7 +1245,7 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, ...@@ -1245,7 +1245,7 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
if (mapping) if (mapping)
up(&mapping->i_shared_sem); spin_unlock(&mapping->i_mmap_lock);
return 0; return 0;
} }
...@@ -1479,7 +1479,7 @@ void exit_mmap(struct mm_struct *mm) ...@@ -1479,7 +1479,7 @@ void exit_mmap(struct mm_struct *mm)
/* Insert vm structure into process list sorted by address /* Insert vm structure into process list sorted by address
* and into the inode's i_mmap ring. If vm_file is non-NULL * and into the inode's i_mmap ring. If vm_file is non-NULL
* then i_shared_sem is taken here. * then i_mmap_lock is taken here.
*/ */
void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
{ {
......
...@@ -180,7 +180,6 @@ static unsigned long move_vma(struct vm_area_struct *vma, ...@@ -180,7 +180,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
unsigned long new_len, unsigned long new_addr) unsigned long new_len, unsigned long new_addr)
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
struct address_space *mapping = NULL;
struct vm_area_struct *new_vma; struct vm_area_struct *new_vma;
unsigned long vm_flags = vma->vm_flags; unsigned long vm_flags = vma->vm_flags;
unsigned long new_pgoff; unsigned long new_pgoff;
...@@ -201,16 +200,6 @@ static unsigned long move_vma(struct vm_area_struct *vma, ...@@ -201,16 +200,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
if (!new_vma) if (!new_vma)
return -ENOMEM; return -ENOMEM;
if (vma->vm_file) {
/*
* Subtle point from Rajesh Venkatasubramanian: before
* moving file-based ptes, we must lock vmtruncate out,
* since it might clean the dst vma before the src vma,
* and we propagate stale pages into the dst afterward.
*/
mapping = vma->vm_file->f_mapping;
down(&mapping->i_shared_sem);
}
moved_len = move_page_tables(vma, new_addr, old_addr, old_len, &cows); moved_len = move_page_tables(vma, new_addr, old_addr, old_len, &cows);
if (moved_len < old_len) { if (moved_len < old_len) {
/* /*
...@@ -227,8 +216,6 @@ static unsigned long move_vma(struct vm_area_struct *vma, ...@@ -227,8 +216,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
if (cows) /* Downgrade or remove this message later */ if (cows) /* Downgrade or remove this message later */
printk(KERN_WARNING "%s: mremap moved %d cows\n", printk(KERN_WARNING "%s: mremap moved %d cows\n",
current->comm, cows); current->comm, cows);
if (mapping)
up(&mapping->i_shared_sem);
/* Conceal VM_ACCOUNT so old reservation is not undone */ /* Conceal VM_ACCOUNT so old reservation is not undone */
if (vm_flags & VM_ACCOUNT) { if (vm_flags & VM_ACCOUNT) {
......
...@@ -300,7 +300,7 @@ static inline int page_referenced_anon(struct page *page) ...@@ -300,7 +300,7 @@ static inline int page_referenced_anon(struct page *page)
* *
* This function is only called from page_referenced for object-based pages. * This function is only called from page_referenced for object-based pages.
* *
* The semaphore address_space->i_shared_sem is tried. If it can't be gotten, * The semaphore address_space->i_mmap_lock is tried. If it can't be gotten,
* assume a reference count of 0, so try_to_unmap will then have a go. * assume a reference count of 0, so try_to_unmap will then have a go.
*/ */
static inline int page_referenced_file(struct page *page) static inline int page_referenced_file(struct page *page)
...@@ -313,7 +313,7 @@ static inline int page_referenced_file(struct page *page) ...@@ -313,7 +313,7 @@ static inline int page_referenced_file(struct page *page)
int referenced = 0; int referenced = 0;
int failed = 0; int failed = 0;
if (down_trylock(&mapping->i_shared_sem)) if (!spin_trylock(&mapping->i_mmap_lock))
return 0; return 0;
list_for_each_entry(vma, &mapping->i_mmap, shared) { list_for_each_entry(vma, &mapping->i_mmap, shared) {
...@@ -355,7 +355,7 @@ static inline int page_referenced_file(struct page *page) ...@@ -355,7 +355,7 @@ static inline int page_referenced_file(struct page *page)
WARN_ON(!failed); WARN_ON(!failed);
out: out:
up(&mapping->i_shared_sem); spin_unlock(&mapping->i_mmap_lock);
return referenced; return referenced;
} }
...@@ -725,7 +725,7 @@ static inline int try_to_unmap_anon(struct page *page) ...@@ -725,7 +725,7 @@ static inline int try_to_unmap_anon(struct page *page)
* *
* This function is only called from try_to_unmap for object-based pages. * This function is only called from try_to_unmap for object-based pages.
* *
* The semaphore address_space->i_shared_sem is tried. If it can't be gotten, * The semaphore address_space->i_mmap_lock is tried. If it can't be gotten,
* return a temporary error. * return a temporary error.
*/ */
static inline int try_to_unmap_file(struct page *page) static inline int try_to_unmap_file(struct page *page)
...@@ -740,7 +740,7 @@ static inline int try_to_unmap_file(struct page *page) ...@@ -740,7 +740,7 @@ static inline int try_to_unmap_file(struct page *page)
unsigned long max_nl_cursor = 0; unsigned long max_nl_cursor = 0;
unsigned long max_nl_size = 0; unsigned long max_nl_size = 0;
if (down_trylock(&mapping->i_shared_sem)) if (!spin_trylock(&mapping->i_mmap_lock))
return ret; return ret;
list_for_each_entry(vma, &mapping->i_mmap, shared) { list_for_each_entry(vma, &mapping->i_mmap, shared) {
...@@ -839,7 +839,7 @@ static inline int try_to_unmap_file(struct page *page) ...@@ -839,7 +839,7 @@ static inline int try_to_unmap_file(struct page *page)
relock: relock:
page_map_lock(page); page_map_lock(page);
out: out:
up(&mapping->i_shared_sem); spin_unlock(&mapping->i_mmap_lock);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment