Commit 64a8920f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'anonvma'

* anonvma:
  anonvma: when setting up page->mapping, we need to pick the _oldest_ anonvma
  anon_vma: clone the anon_vma chain in the right order
  vma_adjust: fix the copying of anon_vma chains
  Simplify and comment on anon_vma re-use for anon_vma_prepare()
parents 50b88c46 ea90002b
...@@ -507,11 +507,12 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, ...@@ -507,11 +507,12 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
struct address_space *mapping = NULL; struct address_space *mapping = NULL;
struct prio_tree_root *root = NULL; struct prio_tree_root *root = NULL;
struct file *file = vma->vm_file; struct file *file = vma->vm_file;
struct anon_vma *anon_vma = NULL;
long adjust_next = 0; long adjust_next = 0;
int remove_next = 0; int remove_next = 0;
if (next && !insert) { if (next && !insert) {
struct vm_area_struct *exporter = NULL;
if (end >= next->vm_end) { if (end >= next->vm_end) {
/* /*
* vma expands, overlapping all the next, and * vma expands, overlapping all the next, and
...@@ -519,7 +520,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, ...@@ -519,7 +520,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
*/ */
again: remove_next = 1 + (end > next->vm_end); again: remove_next = 1 + (end > next->vm_end);
end = next->vm_end; end = next->vm_end;
anon_vma = next->anon_vma; exporter = next;
importer = vma; importer = vma;
} else if (end > next->vm_start) { } else if (end > next->vm_start) {
/* /*
...@@ -527,7 +528,7 @@ again: remove_next = 1 + (end > next->vm_end); ...@@ -527,7 +528,7 @@ again: remove_next = 1 + (end > next->vm_end);
* mprotect case 5 shifting the boundary up. * mprotect case 5 shifting the boundary up.
*/ */
adjust_next = (end - next->vm_start) >> PAGE_SHIFT; adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
anon_vma = next->anon_vma; exporter = next;
importer = vma; importer = vma;
} else if (end < vma->vm_end) { } else if (end < vma->vm_end) {
/* /*
...@@ -536,28 +537,19 @@ again: remove_next = 1 + (end > next->vm_end); ...@@ -536,28 +537,19 @@ again: remove_next = 1 + (end > next->vm_end);
* mprotect case 4 shifting the boundary down. * mprotect case 4 shifting the boundary down.
*/ */
adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT); adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
anon_vma = next->anon_vma; exporter = vma;
importer = next; importer = next;
} }
}
/*
* When changing only vma->vm_end, we don't really need anon_vma lock.
*/
if (vma->anon_vma && (insert || importer || start != vma->vm_start))
anon_vma = vma->anon_vma;
if (anon_vma) {
/* /*
* Easily overlooked: when mprotect shifts the boundary, * Easily overlooked: when mprotect shifts the boundary,
* make sure the expanding vma has anon_vma set if the * make sure the expanding vma has anon_vma set if the
* shrinking vma had, to cover any anon pages imported. * shrinking vma had, to cover any anon pages imported.
*/ */
if (importer && !importer->anon_vma) { if (exporter && exporter->anon_vma && !importer->anon_vma) {
/* Block reverse map lookups until things are set up. */ if (anon_vma_clone(importer, exporter))
if (anon_vma_clone(importer, vma)) {
return -ENOMEM; return -ENOMEM;
} importer->anon_vma = exporter->anon_vma;
importer->anon_vma = anon_vma;
} }
} }
...@@ -824,6 +816,61 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, ...@@ -824,6 +816,61 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
return NULL; return NULL;
} }
/*
* Rough compatbility check to quickly see if it's even worth looking
* at sharing an anon_vma.
*
* They need to have the same vm_file, and the flags can only differ
* in things that mprotect may change.
*
* NOTE! The fact that we share an anon_vma doesn't _have_ to mean that
* we can merge the two vma's. For example, we refuse to merge a vma if
* there is a vm_ops->close() function, because that indicates that the
* driver is doing some kind of reference counting. But that doesn't
* really matter for the anon_vma sharing case.
*/
static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
{
return a->vm_end == b->vm_start &&
mpol_equal(vma_policy(a), vma_policy(b)) &&
a->vm_file == b->vm_file &&
!((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
}
/*
* Do some basic sanity checking to see if we can re-use the anon_vma
* from 'old'. The 'a'/'b' vma's are in VM order - one of them will be
* the same as 'old', the other will be the new one that is trying
* to share the anon_vma.
*
* NOTE! This runs with mm_sem held for reading, so it is possible that
* the anon_vma of 'old' is concurrently in the process of being set up
* by another page fault trying to merge _that_. But that's ok: if it
* is being set up, that automatically means that it will be a singleton
* acceptable for merging, so we can do all of this optimistically. But
* we do that ACCESS_ONCE() to make sure that we never re-load the pointer.
*
* IOW: that the "list_is_singular()" test on the anon_vma_chain only
* matters for the 'stable anon_vma' case (ie the thing we want to avoid
* is to return an anon_vma that is "complex" due to having gone through
* a fork).
*
* We also make sure that the two vma's are compatible (adjacent,
* and with the same memory policies). That's all stable, even with just
* a read lock on the mm_sem.
*/
static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
{
if (anon_vma_compatible(a, b)) {
struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
if (anon_vma && list_is_singular(&old->anon_vma_chain))
return anon_vma;
}
return NULL;
}
/* /*
* find_mergeable_anon_vma is used by anon_vma_prepare, to check * find_mergeable_anon_vma is used by anon_vma_prepare, to check
* neighbouring vmas for a suitable anon_vma, before it goes off * neighbouring vmas for a suitable anon_vma, before it goes off
...@@ -834,28 +881,16 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, ...@@ -834,28 +881,16 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
*/ */
struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
{ {
struct anon_vma *anon_vma;
struct vm_area_struct *near; struct vm_area_struct *near;
unsigned long vm_flags;
near = vma->vm_next; near = vma->vm_next;
if (!near) if (!near)
goto try_prev; goto try_prev;
/* anon_vma = reusable_anon_vma(near, vma, near);
* Since only mprotect tries to remerge vmas, match flags if (anon_vma)
* which might be mprotected into each other later on. return anon_vma;
* Neither mlock nor madvise tries to remerge at present,
* so leave their flags as obstructing a merge.
*/
vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
if (near->anon_vma && vma->vm_end == near->vm_start &&
mpol_equal(vma_policy(vma), vma_policy(near)) &&
can_vma_merge_before(near, vm_flags,
NULL, vma->vm_file, vma->vm_pgoff +
((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
return near->anon_vma;
try_prev: try_prev:
/* /*
* It is potentially slow to have to call find_vma_prev here. * It is potentially slow to have to call find_vma_prev here.
...@@ -868,14 +903,9 @@ struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) ...@@ -868,14 +903,9 @@ struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
if (!near) if (!near)
goto none; goto none;
vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); anon_vma = reusable_anon_vma(near, near, vma);
vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); if (anon_vma)
return anon_vma;
if (near->anon_vma && near->vm_end == vma->vm_start &&
mpol_equal(vma_policy(near), vma_policy(vma)) &&
can_vma_merge_after(near, vm_flags,
NULL, vma->vm_file, vma->vm_pgoff))
return near->anon_vma;
none: none:
/* /*
* There's no absolute need to look only at touching neighbours: * There's no absolute need to look only at touching neighbours:
......
...@@ -182,7 +182,7 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) ...@@ -182,7 +182,7 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
{ {
struct anon_vma_chain *avc, *pavc; struct anon_vma_chain *avc, *pavc;
list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) { list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
avc = anon_vma_chain_alloc(); avc = anon_vma_chain_alloc();
if (!avc) if (!avc)
goto enomem_failure; goto enomem_failure;
...@@ -734,9 +734,20 @@ void page_move_anon_rmap(struct page *page, ...@@ -734,9 +734,20 @@ void page_move_anon_rmap(struct page *page,
static void __page_set_anon_rmap(struct page *page, static void __page_set_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address) struct vm_area_struct *vma, unsigned long address)
{ {
struct anon_vma *anon_vma = vma->anon_vma; struct anon_vma_chain *avc;
struct anon_vma *anon_vma;
BUG_ON(!vma->anon_vma);
/*
* We must use the _oldest_ possible anon_vma for the page mapping!
*
* So take the last AVC chain entry in the vma, which is the deepest
* ancestor, and use the anon_vma from that.
*/
avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma);
anon_vma = avc->anon_vma;
BUG_ON(!anon_vma);
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
page->mapping = (struct address_space *) anon_vma; page->mapping = (struct address_space *) anon_vma;
page->index = linear_page_index(vma, address); page->index = linear_page_index(vma, address);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment