Commit cacded5e authored by Lorenzo Stoakes's avatar Lorenzo Stoakes Committed by Andrew Morton

mm: avoid using vma_merge() for new VMAs

Abstract vma_merge_new_vma() to use vma_merge_struct and rename the
resultant function vma_merge_new_range() to be clear what the purpose of
this function is - a new VMA is desired in the specified range, and we
wish to see if it is possible to 'merge' surrounding VMAs into this range
rather than having to allocate a new VMA.

Note that this function uses vma_extend() exclusively, so adopts its
requirement that the iterator point at or before the gap.  We add an
assert to this effect.

This is as opposed to vma_merge_existing_range(), which will be introduced
in a subsequent commit, and provide the same functionality for cases in
which we are modifying an existing VMA.

In mmap_region() and do_brk_flags() we open code scenarios where we prefer
to use vma_expand() rather than invoke a full vma_merge() operation.

Abstract this logic and eliminate all of the open-coding, and also use the
same logic for all cases where we add new VMAs to, rather than ultimately
use vma_merge(), rather use vma_expand().

Doing so removes duplication and simplifies VMA merging in all such cases,
laying the ground for us to eliminate the merging of new VMAs in
vma_merge() altogether.

Also add the ability for the vmg to track state, and able to report
errors, allowing for us to differentiate a failed merge from an inability
to allocate memory in callers.

This makes it far easier to understand what is happening in these cases
avoiding confusion, bugs and allowing for future optimisation.

Also introduce vma_iter_next_rewind() to allow for retrieval of the next,
and (optionally) the prev VMA, rewinding to the start of the previous gap.

Introduce are_anon_vmas_compatible() to abstract individual VMA anon_vma
comparison for the case of merging on both sides where the anon_vma of the
VMA being merged maybe compatible with prev and next, but prev and next's
anon_vma's may not be compatible with each other.

Finally also introduce can_vma_merge_left() / can_vma_merge_right() to
check adjacent VMA compatibility and that they are indeed adjacent.

Link: https://lkml.kernel.org/r/49d37c0769b6b9dc03b27fe4d059173832556392.1725040657.git.lorenzo.stoakes@oracle.comSigned-off-by: default avatarLorenzo Stoakes <lorenzo.stoakes@oracle.com>
Tested-by: default avatarMark Brown <broonie@kernel.org>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Bert Karwatzki <spasswolf@web.de>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent fc21959f
...@@ -1364,8 +1364,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr, ...@@ -1364,8 +1364,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
{ {
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
struct vm_area_struct *vma = NULL; struct vm_area_struct *vma = NULL;
struct vm_area_struct *next, *prev, *merge;
pgoff_t pglen = PHYS_PFN(len); pgoff_t pglen = PHYS_PFN(len);
struct vm_area_struct *merge;
unsigned long charged = 0; unsigned long charged = 0;
struct vma_munmap_struct vms; struct vma_munmap_struct vms;
struct ma_state mas_detach; struct ma_state mas_detach;
...@@ -1389,14 +1389,11 @@ unsigned long mmap_region(struct file *file, unsigned long addr, ...@@ -1389,14 +1389,11 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
if (error) if (error)
goto gather_failed; goto gather_failed;
next = vmg.next = vms.next; vmg.next = vms.next;
prev = vmg.prev = vms.prev; vmg.prev = vms.prev;
vma = NULL; vma = NULL;
} else { } else {
next = vmg.next = vma_next(&vmi); vmg.next = vma_iter_next_rewind(&vmi, &vmg.prev);
prev = vmg.prev = vma_prev(&vmi);
if (prev)
vma_iter_next_range(&vmi);
} }
/* Check against address space limit. */ /* Check against address space limit. */
...@@ -1417,46 +1414,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr, ...@@ -1417,46 +1414,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
vmg.flags = vm_flags; vmg.flags = vm_flags;
} }
if (vm_flags & VM_SPECIAL) vma = vma_merge_new_range(&vmg);
goto cannot_expand; if (vma)
goto expanded;
/* Attempt to expand an old mapping */
/* Check next */
if (next && next->vm_start == end && can_vma_merge_before(&vmg)) {
vmg.end = next->vm_end;
vma = vmg.vma = next;
vmg.pgoff = next->vm_pgoff - pglen;
/*
* We set this here so if we will merge with the previous VMA in
* the code below, can_vma_merge_after() ensures anon_vma
* compatibility between prev and next.
*/
vmg.anon_vma = vma->anon_vma;
vmg.uffd_ctx = vma->vm_userfaultfd_ctx;
}
/* Check prev */
if (prev && prev->vm_end == addr && can_vma_merge_after(&vmg)) {
vmg.start = prev->vm_start;
vma = vmg.vma = prev;
vmg.pgoff = prev->vm_pgoff;
vma_prev(&vmi); /* Equivalent to going to the previous range */
}
if (vma) {
/* Actually expand, if possible */
if (!vma_expand(&vmg)) {
khugepaged_enter_vma(vma, vm_flags);
goto expanded;
}
/* If the expand fails, then reposition the vma iterator */
if (unlikely(vma == prev))
vma_iter_set(&vmi, addr);
}
cannot_expand:
/* /*
* Determine the object being mapped and call the appropriate * Determine the object being mapped and call the appropriate
* specific mapper. the address has already been validated, but * specific mapper. the address has already been validated, but
...@@ -1503,10 +1463,11 @@ unsigned long mmap_region(struct file *file, unsigned long addr, ...@@ -1503,10 +1463,11 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
* If vm_flags changed after call_mmap(), we should try merge * If vm_flags changed after call_mmap(), we should try merge
* vma again as we may succeed this time. * vma again as we may succeed this time.
*/ */
if (unlikely(vm_flags != vma->vm_flags && prev)) { if (unlikely(vm_flags != vma->vm_flags && vmg.prev)) {
merge = vma_merge_new_vma(&vmi, prev, vma, vmg.flags = vma->vm_flags;
vma->vm_start, vma->vm_end, /* If this fails, state is reset ready for a reattempt. */
vma->vm_pgoff); merge = vma_merge_new_range(&vmg);
if (merge) { if (merge) {
/* /*
* ->mmap() can change vma->vm_file and fput * ->mmap() can change vma->vm_file and fput
...@@ -1522,6 +1483,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, ...@@ -1522,6 +1483,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
vm_flags = vma->vm_flags; vm_flags = vma->vm_flags;
goto unmap_writable; goto unmap_writable;
} }
vma_iter_config(&vmi, addr, end);
} }
vm_flags = vma->vm_flags; vm_flags = vma->vm_flags;
...@@ -1554,7 +1516,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, ...@@ -1554,7 +1516,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
vma_link_file(vma); vma_link_file(vma);
/* /*
* vma_merge() calls khugepaged_enter_vma() either, the below * vma_merge_new_range() calls khugepaged_enter_vma() too, the below
* call covers the non-merge case. * call covers the non-merge case.
*/ */
khugepaged_enter_vma(vma, vma->vm_flags); khugepaged_enter_vma(vma, vma->vm_flags);
...@@ -1609,7 +1571,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, ...@@ -1609,7 +1571,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
vma_iter_set(&vmi, vma->vm_end); vma_iter_set(&vmi, vma->vm_end);
/* Undo any partial mapping done by a device driver. */ /* Undo any partial mapping done by a device driver. */
unmap_region(&vmi.mas, vma, prev, next); unmap_region(&vmi.mas, vma, vmg.prev, vmg.next);
} }
if (writable_file_mapping) if (writable_file_mapping)
mapping_unmap_writable(file->f_mapping); mapping_unmap_writable(file->f_mapping);
...@@ -1756,7 +1718,6 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, ...@@ -1756,7 +1718,6 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long addr, unsigned long len, unsigned long flags) unsigned long addr, unsigned long len, unsigned long flags)
{ {
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
struct vma_prepare vp;
/* /*
* Check against address space limits by the changed size * Check against address space limits by the changed size
...@@ -1780,25 +1741,12 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, ...@@ -1780,25 +1741,12 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
VMG_STATE(vmg, mm, vmi, addr, addr + len, flags, PHYS_PFN(addr)); VMG_STATE(vmg, mm, vmi, addr, addr + len, flags, PHYS_PFN(addr));
vmg.prev = vma; vmg.prev = vma;
if (can_vma_merge_after(&vmg)) { vma_iter_next_range(vmi);
vma_iter_config(vmi, vma->vm_start, addr + len);
if (vma_iter_prealloc(vmi, vma)) if (vma_merge_new_range(&vmg))
goto unacct_fail;
vma_start_write(vma);
init_vma_prep(&vp, vma);
vma_prepare(&vp);
vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0);
vma->vm_end = addr + len;
vm_flags_set(vma, VM_SOFTDIRTY);
vma_iter_store(vmi, vma);
vma_complete(&vp, vmi, mm);
validate_mm(mm);
khugepaged_enter_vma(vma, flags);
goto out; goto out;
} else if (vmg_nomem(&vmg))
goto unacct_fail;
} }
if (vma) if (vma)
......
...@@ -55,6 +55,13 @@ static inline bool is_mergeable_anon_vma(struct anon_vma *anon_vma1, ...@@ -55,6 +55,13 @@ static inline bool is_mergeable_anon_vma(struct anon_vma *anon_vma1,
return anon_vma1 == anon_vma2; return anon_vma1 == anon_vma2;
} }
/* Are the anon_vma's belonging to each VMA compatible with one another? */
static inline bool are_anon_vmas_compatible(struct vm_area_struct *vma1,
struct vm_area_struct *vma2)
{
return is_mergeable_anon_vma(vma1->anon_vma, vma2->anon_vma, NULL);
}
/* /*
* init_multi_vma_prep() - Initializer for struct vma_prepare * init_multi_vma_prep() - Initializer for struct vma_prepare
* @vp: The vma_prepare struct * @vp: The vma_prepare struct
...@@ -130,6 +137,44 @@ bool can_vma_merge_after(struct vma_merge_struct *vmg) ...@@ -130,6 +137,44 @@ bool can_vma_merge_after(struct vma_merge_struct *vmg)
return false; return false;
} }
/*
* Can the proposed VMA be merged with the left (previous) VMA taking into
* account the start position of the proposed range.
*/
static bool can_vma_merge_left(struct vma_merge_struct *vmg)
{
return vmg->prev && vmg->prev->vm_end == vmg->start &&
can_vma_merge_after(vmg);
}
/*
* Can the proposed VMA be merged with the right (next) VMA taking into
* account the end position of the proposed range.
*
* In addition, if we can merge with the left VMA, ensure that left and right
* anon_vma's are also compatible.
*/
static bool can_vma_merge_right(struct vma_merge_struct *vmg,
bool can_merge_left)
{
if (!vmg->next || vmg->end != vmg->next->vm_start ||
!can_vma_merge_before(vmg))
return false;
if (!can_merge_left)
return true;
/*
* If we can merge with prev (left) and next (right), indicating that
* each VMA's anon_vma is compatible with the proposed anon_vma, this
* does not mean prev and next are compatible with EACH OTHER.
*
* We therefore check this in addition to mergeability to either side.
*/
return are_anon_vmas_compatible(vmg->prev, vmg->next);
}
/* /*
* Close a vm structure and free it. * Close a vm structure and free it.
*/ */
...@@ -464,6 +509,111 @@ void validate_mm(struct mm_struct *mm) ...@@ -464,6 +509,111 @@ void validate_mm(struct mm_struct *mm)
} }
#endif /* CONFIG_DEBUG_VM_MAPLE_TREE */ #endif /* CONFIG_DEBUG_VM_MAPLE_TREE */
/*
* vma_merge_new_range - Attempt to merge a new VMA into address space
*
* @vmg: Describes the VMA we are adding, in the range @vmg->start to @vmg->end
* (exclusive), which we try to merge with any adjacent VMAs if possible.
*
* We are about to add a VMA to the address space starting at @vmg->start and
* ending at @vmg->end. There are three different possible scenarios:
*
* 1. There is a VMA with identical properties immediately adjacent to the
* proposed new VMA [@vmg->start, @vmg->end) either before or after it -
* EXPAND that VMA:
*
* Proposed: |-----| or |-----|
* Existing: |----| |----|
*
* 2. There are VMAs with identical properties immediately adjacent to the
* proposed new VMA [@vmg->start, @vmg->end) both before AND after it -
* EXPAND the former and REMOVE the latter:
*
* Proposed: |-----|
* Existing: |----| |----|
*
* 3. There are no VMAs immediately adjacent to the proposed new VMA or those
* VMAs do not have identical attributes - NO MERGE POSSIBLE.
*
* In instances where we can merge, this function returns the expanded VMA which
* will have its range adjusted accordingly and the underlying maple tree also
* adjusted.
*
* Returns: In instances where no merge was possible, NULL. Otherwise, a pointer
* to the VMA we expanded.
*
* This function adjusts @vmg to provide @vmg->next if not already specified,
* and adjusts [@vmg->start, @vmg->end) to span the expanded range.
*
* ASSUMPTIONS:
* - The caller must hold a WRITE lock on the mm_struct->mmap_lock.
* - The caller must have determined that [@vmg->start, @vmg->end) is empty,
other than VMAs that will be unmapped should the operation succeed.
* - The caller must have specified the previous vma in @vmg->prev.
* - The caller must have specified the next vma in @vmg->next.
* - The caller must have positioned the vmi at or before the gap.
*/
struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg)
{
struct vm_area_struct *prev = vmg->prev;
struct vm_area_struct *next = vmg->next;
unsigned long start = vmg->start;
unsigned long end = vmg->end;
pgoff_t pgoff = vmg->pgoff;
pgoff_t pglen = PHYS_PFN(end - start);
bool can_merge_left, can_merge_right;
mmap_assert_write_locked(vmg->mm);
VM_WARN_ON(vmg->vma);
/* vmi must point at or before the gap. */
VM_WARN_ON(vma_iter_addr(vmg->vmi) > end);
vmg->state = VMA_MERGE_NOMERGE;
/* Special VMAs are unmergeable, also if no prev/next. */
if ((vmg->flags & VM_SPECIAL) || (!prev && !next))
return NULL;
can_merge_left = can_vma_merge_left(vmg);
can_merge_right = can_vma_merge_right(vmg, can_merge_left);
/* If we can merge with the next VMA, adjust vmg accordingly. */
if (can_merge_right) {
vmg->end = next->vm_end;
vmg->vma = next;
vmg->pgoff = next->vm_pgoff - pglen;
}
/* If we can merge with the previous VMA, adjust vmg accordingly. */
if (can_merge_left) {
vmg->start = prev->vm_start;
vmg->vma = prev;
vmg->pgoff = prev->vm_pgoff;
vma_prev(vmg->vmi); /* Equivalent to going to the previous range */
}
/*
* Now try to expand adjacent VMA(s). This takes care of removing the
* following VMA if we have VMAs on both sides.
*/
if (vmg->vma && !vma_expand(vmg)) {
khugepaged_enter_vma(vmg->vma, vmg->flags);
vmg->state = VMA_MERGE_SUCCESS;
return vmg->vma;
}
/* If expansion failed, reset state. Allows us to retry merge later. */
vmg->vma = NULL;
vmg->start = start;
vmg->end = end;
vmg->pgoff = pgoff;
if (vmg->vma == prev)
vma_iter_set(vmg->vmi, start);
return NULL;
}
/* /*
* vma_expand - Expand an existing VMA * vma_expand - Expand an existing VMA
* *
...@@ -474,7 +624,11 @@ void validate_mm(struct mm_struct *mm) ...@@ -474,7 +624,11 @@ void validate_mm(struct mm_struct *mm)
* vmg->next->vm_end. Checking if the vmg->vma can expand and merge with * vmg->next->vm_end. Checking if the vmg->vma can expand and merge with
* vmg->next needs to be handled by the caller. * vmg->next needs to be handled by the caller.
* *
* Returns: 0 on success * Returns: 0 on success.
*
* ASSUMPTIONS:
* - The caller must hold a WRITE lock on vmg->vma->mm->mmap_lock.
* - The caller must have set @vmg->vma and @vmg->next.
*/ */
int vma_expand(struct vma_merge_struct *vmg) int vma_expand(struct vma_merge_struct *vmg)
{ {
...@@ -484,6 +638,8 @@ int vma_expand(struct vma_merge_struct *vmg) ...@@ -484,6 +638,8 @@ int vma_expand(struct vma_merge_struct *vmg)
struct vm_area_struct *next = vmg->next; struct vm_area_struct *next = vmg->next;
struct vma_prepare vp; struct vma_prepare vp;
mmap_assert_write_locked(vmg->mm);
vma_start_write(vma); vma_start_write(vma);
if (next && (vma != next) && (vmg->end == next->vm_end)) { if (next && (vma != next) && (vmg->end == next->vm_end)) {
int ret; int ret;
...@@ -516,6 +672,7 @@ int vma_expand(struct vma_merge_struct *vmg) ...@@ -516,6 +672,7 @@ int vma_expand(struct vma_merge_struct *vmg)
return 0; return 0;
nomem: nomem:
vmg->state = VMA_MERGE_ERROR_NOMEM;
if (anon_dup) if (anon_dup)
unlink_anon_vmas(anon_dup); unlink_anon_vmas(anon_dup);
return -ENOMEM; return -ENOMEM;
...@@ -1029,6 +1186,8 @@ static struct vm_area_struct *vma_merge(struct vma_merge_struct *vmg) ...@@ -1029,6 +1186,8 @@ static struct vm_area_struct *vma_merge(struct vma_merge_struct *vmg)
pgoff_t pglen = PHYS_PFN(end - addr); pgoff_t pglen = PHYS_PFN(end - addr);
long adj_start = 0; long adj_start = 0;
vmg->state = VMA_MERGE_NOMERGE;
/* /*
* We later require that vma->vm_flags == vm_flags, * We later require that vma->vm_flags == vm_flags,
* so this tests vma->vm_flags & VM_SPECIAL, too. * so this tests vma->vm_flags & VM_SPECIAL, too.
...@@ -1180,13 +1339,19 @@ static struct vm_area_struct *vma_merge(struct vma_merge_struct *vmg) ...@@ -1180,13 +1339,19 @@ static struct vm_area_struct *vma_merge(struct vma_merge_struct *vmg)
vma_complete(&vp, vmg->vmi, mm); vma_complete(&vp, vmg->vmi, mm);
validate_mm(mm); validate_mm(mm);
khugepaged_enter_vma(res, vmg->flags); khugepaged_enter_vma(res, vmg->flags);
vmg->state = VMA_MERGE_SUCCESS;
return res; return res;
prealloc_fail: prealloc_fail:
vmg->state = VMA_MERGE_ERROR_NOMEM;
if (anon_dup) if (anon_dup)
unlink_anon_vmas(anon_dup); unlink_anon_vmas(anon_dup);
anon_vma_fail: anon_vma_fail:
if (err == -ENOMEM)
vmg->state = VMA_MERGE_ERROR_NOMEM;
vma_iter_set(vmg->vmi, addr); vma_iter_set(vmg->vmi, addr);
vma_iter_load(vmg->vmi); vma_iter_load(vmg->vmi);
return NULL; return NULL;
...@@ -1293,22 +1458,6 @@ struct vm_area_struct ...@@ -1293,22 +1458,6 @@ struct vm_area_struct
return vma_modify(&vmg); return vma_modify(&vmg);
} }
/*
* Attempt to merge a newly mapped VMA with those adjacent to it. The caller
* must ensure that [start, end) does not overlap any existing VMA.
*/
struct vm_area_struct
*vma_merge_new_vma(struct vma_iterator *vmi, struct vm_area_struct *prev,
struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgoff_t pgoff)
{
VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
vmg.pgoff = pgoff;
return vma_merge(&vmg);
}
/* /*
* Expand vma by delta bytes, potentially merging with an immediately adjacent * Expand vma by delta bytes, potentially merging with an immediately adjacent
* VMA with identical properties. * VMA with identical properties.
...@@ -1319,8 +1468,10 @@ struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi, ...@@ -1319,8 +1468,10 @@ struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
{ {
VMG_VMA_STATE(vmg, vmi, vma, vma, vma->vm_end, vma->vm_end + delta); VMG_VMA_STATE(vmg, vmi, vma, vma, vma->vm_end, vma->vm_end + delta);
/* vma is specified as prev, so case 1 or 2 will apply. */ vmg.next = vma_iter_next_rewind(vmi, NULL);
return vma_merge(&vmg); vmg.vma = NULL; /* We use the VMA to populate VMG fields only. */
return vma_merge_new_range(&vmg);
} }
void unlink_file_vma_batch_init(struct unlink_vma_file_batch *vb) void unlink_file_vma_batch_init(struct unlink_vma_file_batch *vb)
...@@ -1421,9 +1572,10 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, ...@@ -1421,9 +1572,10 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
struct vm_area_struct *vma = *vmap; struct vm_area_struct *vma = *vmap;
unsigned long vma_start = vma->vm_start; unsigned long vma_start = vma->vm_start;
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *new_vma, *prev; struct vm_area_struct *new_vma;
bool faulted_in_anon_vma = true; bool faulted_in_anon_vma = true;
VMA_ITERATOR(vmi, mm, addr); VMA_ITERATOR(vmi, mm, addr);
VMG_VMA_STATE(vmg, &vmi, NULL, vma, addr, addr + len);
/* /*
* If anonymous vma has not yet been faulted, update new pgoff * If anonymous vma has not yet been faulted, update new pgoff
...@@ -1434,11 +1586,15 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, ...@@ -1434,11 +1586,15 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
faulted_in_anon_vma = false; faulted_in_anon_vma = false;
} }
new_vma = find_vma_prev(mm, addr, &prev); new_vma = find_vma_prev(mm, addr, &vmg.prev);
if (new_vma && new_vma->vm_start < addr + len) if (new_vma && new_vma->vm_start < addr + len)
return NULL; /* should never get here */ return NULL; /* should never get here */
new_vma = vma_merge_new_vma(&vmi, prev, vma, addr, addr + len, pgoff); vmg.vma = NULL; /* New VMA range. */
vmg.pgoff = pgoff;
vmg.next = vma_iter_next_rewind(&vmi, NULL);
new_vma = vma_merge_new_range(&vmg);
if (new_vma) { if (new_vma) {
/* /*
* Source vma may have been merged into new_vma * Source vma may have been merged into new_vma
......
...@@ -52,6 +52,13 @@ struct vma_munmap_struct { ...@@ -52,6 +52,13 @@ struct vma_munmap_struct {
unsigned long data_vm; unsigned long data_vm;
}; };
enum vma_merge_state {
VMA_MERGE_START,
VMA_MERGE_ERROR_NOMEM,
VMA_MERGE_NOMERGE,
VMA_MERGE_SUCCESS,
};
/* Represents a VMA merge operation. */ /* Represents a VMA merge operation. */
struct vma_merge_struct { struct vma_merge_struct {
struct mm_struct *mm; struct mm_struct *mm;
...@@ -68,8 +75,14 @@ struct vma_merge_struct { ...@@ -68,8 +75,14 @@ struct vma_merge_struct {
struct mempolicy *policy; struct mempolicy *policy;
struct vm_userfaultfd_ctx uffd_ctx; struct vm_userfaultfd_ctx uffd_ctx;
struct anon_vma_name *anon_name; struct anon_vma_name *anon_name;
enum vma_merge_state state;
}; };
static inline bool vmg_nomem(struct vma_merge_struct *vmg)
{
return vmg->state == VMA_MERGE_ERROR_NOMEM;
}
/* Assumes addr >= vma->vm_start. */ /* Assumes addr >= vma->vm_start. */
static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma, static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma,
unsigned long addr) unsigned long addr)
...@@ -85,6 +98,7 @@ static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma, ...@@ -85,6 +98,7 @@ static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma,
.end = end_, \ .end = end_, \
.flags = flags_, \ .flags = flags_, \
.pgoff = pgoff_, \ .pgoff = pgoff_, \
.state = VMA_MERGE_START, \
} }
#define VMG_VMA_STATE(name, vmi_, prev_, vma_, start_, end_) \ #define VMG_VMA_STATE(name, vmi_, prev_, vma_, start_, end_) \
...@@ -103,6 +117,7 @@ static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma, ...@@ -103,6 +117,7 @@ static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma,
.policy = vma_policy(vma_), \ .policy = vma_policy(vma_), \
.uffd_ctx = vma_->vm_userfaultfd_ctx, \ .uffd_ctx = vma_->vm_userfaultfd_ctx, \
.anon_name = anon_vma_name(vma_), \ .anon_name = anon_vma_name(vma_), \
.state = VMA_MERGE_START, \
} }
#ifdef CONFIG_DEBUG_VM_MAPLE_TREE #ifdef CONFIG_DEBUG_VM_MAPLE_TREE
...@@ -309,10 +324,7 @@ struct vm_area_struct ...@@ -309,10 +324,7 @@ struct vm_area_struct
unsigned long new_flags, unsigned long new_flags,
struct vm_userfaultfd_ctx new_ctx); struct vm_userfaultfd_ctx new_ctx);
struct vm_area_struct struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg);
*vma_merge_new_vma(struct vma_iterator *vmi, struct vm_area_struct *prev,
struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgoff_t pgoff);
struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi, struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
struct vm_area_struct *vma, struct vm_area_struct *vma,
...@@ -505,6 +517,34 @@ struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi) ...@@ -505,6 +517,34 @@ struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
return mas_prev_range(&vmi->mas, 0); return mas_prev_range(&vmi->mas, 0);
} }
/*
* Retrieve the next VMA and rewind the iterator to end of the previous VMA, or
* if no previous VMA, to index 0.
*/
static inline
struct vm_area_struct *vma_iter_next_rewind(struct vma_iterator *vmi,
struct vm_area_struct **pprev)
{
struct vm_area_struct *next = vma_next(vmi);
struct vm_area_struct *prev = vma_prev(vmi);
/*
* Consider the case where no previous VMA exists. We advance to the
* next VMA, skipping any gap, then rewind to the start of the range.
*
* If we were to unconditionally advance to the next range we'd wind up
* at the next VMA again, so we check to ensure there is a previous VMA
* to skip over.
*/
if (prev)
vma_iter_next_range(vmi);
if (pprev)
*pprev = prev;
return next;
}
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
static inline bool vma_is_sealed(struct vm_area_struct *vma) static inline bool vma_is_sealed(struct vm_area_struct *vma)
......
...@@ -101,9 +101,9 @@ static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg) ...@@ -101,9 +101,9 @@ static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg)
*/ */
vmg->next = vma_next(vmg->vmi); vmg->next = vma_next(vmg->vmi);
vmg->prev = vma_prev(vmg->vmi); vmg->prev = vma_prev(vmg->vmi);
vma_iter_next_range(vmg->vmi);
vma_iter_set(vmg->vmi, vmg->start); return vma_merge_new_range(vmg);
return vma_merge(vmg);
} }
/* /*
...@@ -162,10 +162,14 @@ static struct vm_area_struct *try_merge_new_vma(struct mm_struct *mm, ...@@ -162,10 +162,14 @@ static struct vm_area_struct *try_merge_new_vma(struct mm_struct *mm,
merged = merge_new(vmg); merged = merge_new(vmg);
if (merged) { if (merged) {
*was_merged = true; *was_merged = true;
ASSERT_EQ(vmg->state, VMA_MERGE_SUCCESS);
return merged; return merged;
} }
*was_merged = false; *was_merged = false;
ASSERT_EQ(vmg->state, VMA_MERGE_NOMERGE);
return alloc_and_link_vma(mm, start, end, pgoff, flags); return alloc_and_link_vma(mm, start, end, pgoff, flags);
} }
...@@ -595,6 +599,7 @@ static bool test_vma_merge_special_flags(void) ...@@ -595,6 +599,7 @@ static bool test_vma_merge_special_flags(void)
vmg.flags = flags | special_flag; vmg.flags = flags | special_flag;
vma = merge_new(&vmg); vma = merge_new(&vmg);
ASSERT_EQ(vma, NULL); ASSERT_EQ(vma, NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
} }
/* 2. Modify VMA with special flag that would otherwise merge. */ /* 2. Modify VMA with special flag that would otherwise merge. */
...@@ -616,6 +621,7 @@ static bool test_vma_merge_special_flags(void) ...@@ -616,6 +621,7 @@ static bool test_vma_merge_special_flags(void)
vmg.flags = flags | special_flag; vmg.flags = flags | special_flag;
vma = merge_existing(&vmg); vma = merge_existing(&vmg);
ASSERT_EQ(vma, NULL); ASSERT_EQ(vma, NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
} }
cleanup_mm(&mm, &vmi); cleanup_mm(&mm, &vmi);
...@@ -708,6 +714,7 @@ static bool test_vma_merge_with_close(void) ...@@ -708,6 +714,7 @@ static bool test_vma_merge_with_close(void)
/* The next VMA having a close() operator should cause the merge to fail.*/ /* The next VMA having a close() operator should cause the merge to fail.*/
ASSERT_EQ(merge_new(&vmg), NULL); ASSERT_EQ(merge_new(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
/* Now create the VMA so we can merge via modified flags */ /* Now create the VMA so we can merge via modified flags */
vmg_set_range(&vmg, 0x1000, 0x2000, 1, flags); vmg_set_range(&vmg, 0x1000, 0x2000, 1, flags);
...@@ -719,6 +726,7 @@ static bool test_vma_merge_with_close(void) ...@@ -719,6 +726,7 @@ static bool test_vma_merge_with_close(void)
* also fail. * also fail.
*/ */
ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(merge_existing(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
/* SCENARIO B /* SCENARIO B
* *
...@@ -744,6 +752,7 @@ static bool test_vma_merge_with_close(void) ...@@ -744,6 +752,7 @@ static bool test_vma_merge_with_close(void)
vmg.vma = vma; vmg.vma = vma;
/* Make sure merge does not occur. */ /* Make sure merge does not occur. */
ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(merge_existing(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
cleanup_mm(&mm, &vmi); cleanup_mm(&mm, &vmi);
return true; return true;
...@@ -792,6 +801,7 @@ static bool test_vma_merge_new_with_close(void) ...@@ -792,6 +801,7 @@ static bool test_vma_merge_new_with_close(void)
vmg_set_range(&vmg, 0x2000, 0x5000, 2, flags); vmg_set_range(&vmg, 0x2000, 0x5000, 2, flags);
vma = merge_new(&vmg); vma = merge_new(&vmg);
ASSERT_NE(vma, NULL); ASSERT_NE(vma, NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma->vm_start, 0); ASSERT_EQ(vma->vm_start, 0);
ASSERT_EQ(vma->vm_end, 0x5000); ASSERT_EQ(vma->vm_end, 0x5000);
ASSERT_EQ(vma->vm_pgoff, 0); ASSERT_EQ(vma->vm_pgoff, 0);
...@@ -831,6 +841,7 @@ static bool test_merge_existing(void) ...@@ -831,6 +841,7 @@ static bool test_merge_existing(void)
vmg.prev = vma; vmg.prev = vma;
vma->anon_vma = &dummy_anon_vma; vma->anon_vma = &dummy_anon_vma;
ASSERT_EQ(merge_existing(&vmg), vma_next); ASSERT_EQ(merge_existing(&vmg), vma_next);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_next->vm_start, 0x3000); ASSERT_EQ(vma_next->vm_start, 0x3000);
ASSERT_EQ(vma_next->vm_end, 0x9000); ASSERT_EQ(vma_next->vm_end, 0x9000);
ASSERT_EQ(vma_next->vm_pgoff, 3); ASSERT_EQ(vma_next->vm_pgoff, 3);
...@@ -861,6 +872,7 @@ static bool test_merge_existing(void) ...@@ -861,6 +872,7 @@ static bool test_merge_existing(void)
vmg.vma = vma; vmg.vma = vma;
vma->anon_vma = &dummy_anon_vma; vma->anon_vma = &dummy_anon_vma;
ASSERT_EQ(merge_existing(&vmg), vma_next); ASSERT_EQ(merge_existing(&vmg), vma_next);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_next->vm_start, 0x2000); ASSERT_EQ(vma_next->vm_start, 0x2000);
ASSERT_EQ(vma_next->vm_end, 0x9000); ASSERT_EQ(vma_next->vm_end, 0x9000);
ASSERT_EQ(vma_next->vm_pgoff, 2); ASSERT_EQ(vma_next->vm_pgoff, 2);
...@@ -889,6 +901,7 @@ static bool test_merge_existing(void) ...@@ -889,6 +901,7 @@ static bool test_merge_existing(void)
vma->anon_vma = &dummy_anon_vma; vma->anon_vma = &dummy_anon_vma;
ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0); ASSERT_EQ(vma_prev->vm_start, 0);
ASSERT_EQ(vma_prev->vm_end, 0x6000); ASSERT_EQ(vma_prev->vm_end, 0x6000);
ASSERT_EQ(vma_prev->vm_pgoff, 0); ASSERT_EQ(vma_prev->vm_pgoff, 0);
...@@ -920,6 +933,7 @@ static bool test_merge_existing(void) ...@@ -920,6 +933,7 @@ static bool test_merge_existing(void)
vmg.vma = vma; vmg.vma = vma;
vma->anon_vma = &dummy_anon_vma; vma->anon_vma = &dummy_anon_vma;
ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0); ASSERT_EQ(vma_prev->vm_start, 0);
ASSERT_EQ(vma_prev->vm_end, 0x7000); ASSERT_EQ(vma_prev->vm_end, 0x7000);
ASSERT_EQ(vma_prev->vm_pgoff, 0); ASSERT_EQ(vma_prev->vm_pgoff, 0);
...@@ -948,6 +962,7 @@ static bool test_merge_existing(void) ...@@ -948,6 +962,7 @@ static bool test_merge_existing(void)
vmg.vma = vma; vmg.vma = vma;
vma->anon_vma = &dummy_anon_vma; vma->anon_vma = &dummy_anon_vma;
ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0); ASSERT_EQ(vma_prev->vm_start, 0);
ASSERT_EQ(vma_prev->vm_end, 0x9000); ASSERT_EQ(vma_prev->vm_end, 0x9000);
ASSERT_EQ(vma_prev->vm_pgoff, 0); ASSERT_EQ(vma_prev->vm_pgoff, 0);
...@@ -981,31 +996,37 @@ static bool test_merge_existing(void) ...@@ -981,31 +996,37 @@ static bool test_merge_existing(void)
vmg.prev = vma; vmg.prev = vma;
vmg.vma = vma; vmg.vma = vma;
ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(merge_existing(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags); vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags);
vmg.prev = vma; vmg.prev = vma;
vmg.vma = vma; vmg.vma = vma;
ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(merge_existing(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
vmg_set_range(&vmg, 0x6000, 0x7000, 6, flags); vmg_set_range(&vmg, 0x6000, 0x7000, 6, flags);
vmg.prev = vma; vmg.prev = vma;
vmg.vma = vma; vmg.vma = vma;
ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(merge_existing(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
vmg_set_range(&vmg, 0x4000, 0x7000, 4, flags); vmg_set_range(&vmg, 0x4000, 0x7000, 4, flags);
vmg.prev = vma; vmg.prev = vma;
vmg.vma = vma; vmg.vma = vma;
ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(merge_existing(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
vmg_set_range(&vmg, 0x4000, 0x6000, 4, flags); vmg_set_range(&vmg, 0x4000, 0x6000, 4, flags);
vmg.prev = vma; vmg.prev = vma;
vmg.vma = vma; vmg.vma = vma;
ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(merge_existing(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags); vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags);
vmg.prev = vma; vmg.prev = vma;
vmg.vma = vma; vmg.vma = vma;
ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(merge_existing(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
ASSERT_EQ(cleanup_mm(&mm, &vmi), 3); ASSERT_EQ(cleanup_mm(&mm, &vmi), 3);
...@@ -1071,6 +1092,7 @@ static bool test_anon_vma_non_mergeable(void) ...@@ -1071,6 +1092,7 @@ static bool test_anon_vma_non_mergeable(void)
vmg.vma = vma; vmg.vma = vma;
ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0); ASSERT_EQ(vma_prev->vm_start, 0);
ASSERT_EQ(vma_prev->vm_end, 0x7000); ASSERT_EQ(vma_prev->vm_end, 0x7000);
ASSERT_EQ(vma_prev->vm_pgoff, 0); ASSERT_EQ(vma_prev->vm_pgoff, 0);
...@@ -1106,6 +1128,7 @@ static bool test_anon_vma_non_mergeable(void) ...@@ -1106,6 +1128,7 @@ static bool test_anon_vma_non_mergeable(void)
vmg.prev = vma_prev; vmg.prev = vma_prev;
ASSERT_EQ(merge_new(&vmg), vma_prev); ASSERT_EQ(merge_new(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0); ASSERT_EQ(vma_prev->vm_start, 0);
ASSERT_EQ(vma_prev->vm_end, 0x7000); ASSERT_EQ(vma_prev->vm_end, 0x7000);
ASSERT_EQ(vma_prev->vm_pgoff, 0); ASSERT_EQ(vma_prev->vm_pgoff, 0);
...@@ -1181,6 +1204,7 @@ static bool test_dup_anon_vma(void) ...@@ -1181,6 +1204,7 @@ static bool test_dup_anon_vma(void)
vmg.vma = vma; vmg.vma = vma;
ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0); ASSERT_EQ(vma_prev->vm_start, 0);
ASSERT_EQ(vma_prev->vm_end, 0x8000); ASSERT_EQ(vma_prev->vm_end, 0x8000);
...@@ -1209,6 +1233,7 @@ static bool test_dup_anon_vma(void) ...@@ -1209,6 +1233,7 @@ static bool test_dup_anon_vma(void)
vmg.vma = vma; vmg.vma = vma;
ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0); ASSERT_EQ(vma_prev->vm_start, 0);
ASSERT_EQ(vma_prev->vm_end, 0x8000); ASSERT_EQ(vma_prev->vm_end, 0x8000);
...@@ -1236,6 +1261,7 @@ static bool test_dup_anon_vma(void) ...@@ -1236,6 +1261,7 @@ static bool test_dup_anon_vma(void)
vmg.vma = vma; vmg.vma = vma;
ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0); ASSERT_EQ(vma_prev->vm_start, 0);
ASSERT_EQ(vma_prev->vm_end, 0x5000); ASSERT_EQ(vma_prev->vm_end, 0x5000);
...@@ -1263,6 +1289,7 @@ static bool test_dup_anon_vma(void) ...@@ -1263,6 +1289,7 @@ static bool test_dup_anon_vma(void)
vmg.vma = vma; vmg.vma = vma;
ASSERT_EQ(merge_existing(&vmg), vma_next); ASSERT_EQ(merge_existing(&vmg), vma_next);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_next->vm_start, 0x3000); ASSERT_EQ(vma_next->vm_start, 0x3000);
ASSERT_EQ(vma_next->vm_end, 0x8000); ASSERT_EQ(vma_next->vm_end, 0x8000);
...@@ -1303,6 +1330,7 @@ static bool test_vmi_prealloc_fail(void) ...@@ -1303,6 +1330,7 @@ static bool test_vmi_prealloc_fail(void)
/* This will cause the merge to fail. */ /* This will cause the merge to fail. */
ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(merge_existing(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_ERROR_NOMEM);
/* We will already have assigned the anon_vma. */ /* We will already have assigned the anon_vma. */
ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma);
/* And it was both cloned and unlinked. */ /* And it was both cloned and unlinked. */
...@@ -1327,6 +1355,7 @@ static bool test_vmi_prealloc_fail(void) ...@@ -1327,6 +1355,7 @@ static bool test_vmi_prealloc_fail(void)
fail_prealloc = true; fail_prealloc = true;
ASSERT_EQ(expand_existing(&vmg), -ENOMEM); ASSERT_EQ(expand_existing(&vmg), -ENOMEM);
ASSERT_EQ(vmg.state, VMA_MERGE_ERROR_NOMEM);
ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(dummy_anon_vma.was_cloned); ASSERT_TRUE(dummy_anon_vma.was_cloned);
......
...@@ -740,6 +740,12 @@ static inline void vma_iter_free(struct vma_iterator *vmi) ...@@ -740,6 +740,12 @@ static inline void vma_iter_free(struct vma_iterator *vmi)
mas_destroy(&vmi->mas); mas_destroy(&vmi->mas);
} }
static inline
struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi)
{
return mas_next_range(&vmi->mas, ULONG_MAX);
}
static inline void vm_acct_memory(long pages) static inline void vm_acct_memory(long pages)
{ {
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment