Commit dba14840 authored by Liam R. Howlett's avatar Liam R. Howlett Committed by Andrew Morton

mm/vma: introduce vma_munmap_struct for use in munmap operations

Use a structure to pass along all the necessary information and counters
involved in removing vmas from the mm_struct.

Update vmi_ function names to vms_ to indicate the first argument type
change.

Link: https://lkml.kernel.org/r/20240830040101.822209-6-Liam.Howlett@oracle.comSigned-off-by: default avatarLiam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: default avatarSuren Baghdasaryan <surenb@google.com>
Reviewed-by: default avatarLorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Bert Karwatzki <spasswolf@web.de>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 6898c903
...@@ -80,6 +80,32 @@ static void init_multi_vma_prep(struct vma_prepare *vp, ...@@ -80,6 +80,32 @@ static void init_multi_vma_prep(struct vma_prepare *vp,
} }
/*
* init_vma_munmap() - Initializer wrapper for vma_munmap_struct
* @vms: The vma munmap struct
* @vmi: The vma iterator
* @vma: The first vm_area_struct to munmap
* @start: The aligned start address to munmap
* @end: The aligned end address to munmap
* @uf: The userfaultfd list_head
* @unlock: Unlock after the operation. Only unlocked on success
*/
static inline void init_vma_munmap(struct vma_munmap_struct *vms,
struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long start, unsigned long end, struct list_head *uf,
bool unlock)
{
vms->vmi = vmi;
vms->vma = vma;
vms->mm = vma->vm_mm;
vms->start = start;
vms->end = end;
vms->unlock = unlock;
vms->uf = uf;
vms->vma_count = 0;
vms->nr_pages = vms->locked_vm = 0;
}
/* /*
* Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff) * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
* in front of (at a lower virtual address and file offset than) the vma. * in front of (at a lower virtual address and file offset than) the vma.
...@@ -685,81 +711,62 @@ static inline void abort_munmap_vmas(struct ma_state *mas_detach) ...@@ -685,81 +711,62 @@ static inline void abort_munmap_vmas(struct ma_state *mas_detach)
} }
/* /*
* vmi_complete_munmap_vmas() - Finish the munmap() operation * vms_complete_munmap_vmas() - Finish the munmap() operation
* @vmi: The vma iterator * @vms: The vma munmap struct
* @vma: The first vma to be munmapped * @mas_detach: The maple state of the detached vmas
* @mm: The mm struct
* @start: The start address
* @end: The end address
* @unlock: Unlock the mm or not
* @mas_detach: them maple state of the detached vma maple tree
* @locked_vm: The locked_vm count in the detached vmas
* *
* This function updates the mm_struct, unmaps the region, frees the resources * This updates the mm_struct, unmaps the region, frees the resources
* used for the munmap() and may downgrade the lock - if requested. Everything * used for the munmap() and may downgrade the lock - if requested. Everything
* needed to be done once the vma maple tree is updated. * needed to be done once the vma maple tree is updated.
*/ */
static void static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
vmi_complete_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, struct ma_state *mas_detach)
struct mm_struct *mm, unsigned long start, unsigned long end,
bool unlock, struct ma_state *mas_detach,
unsigned long locked_vm)
{ {
struct vm_area_struct *prev, *next; struct vm_area_struct *prev, *next;
int count; struct mm_struct *mm;
count = mas_detach->index + 1; mm = vms->mm;
mm->map_count -= count; mm->map_count -= vms->vma_count;
mm->locked_vm -= locked_vm; mm->locked_vm -= vms->locked_vm;
if (unlock) if (vms->unlock)
mmap_write_downgrade(mm); mmap_write_downgrade(mm);
prev = vma_iter_prev_range(vmi); prev = vma_iter_prev_range(vms->vmi);
next = vma_next(vmi); next = vma_next(vms->vmi);
if (next) if (next)
vma_iter_prev_range(vmi); vma_iter_prev_range(vms->vmi);
/* /*
* We can free page tables without write-locking mmap_lock because VMAs * We can free page tables without write-locking mmap_lock because VMAs
* were isolated before we downgraded mmap_lock. * were isolated before we downgraded mmap_lock.
*/ */
mas_set(mas_detach, 1); mas_set(mas_detach, 1);
unmap_region(mm, mas_detach, vma, prev, next, start, end, count, unmap_region(mm, mas_detach, vms->vma, prev, next, vms->start, vms->end,
!unlock); vms->vma_count, !vms->unlock);
/* Statistics and freeing VMAs */ /* Statistics and freeing VMAs */
mas_set(mas_detach, 0); mas_set(mas_detach, 0);
remove_mt(mm, mas_detach); remove_mt(mm, mas_detach);
validate_mm(mm); validate_mm(mm);
if (unlock) if (vms->unlock)
mmap_read_unlock(mm); mmap_read_unlock(mm);
__mt_destroy(mas_detach->tree); __mt_destroy(mas_detach->tree);
} }
/* /*
* vmi_gather_munmap_vmas() - Put all VMAs within a range into a maple tree * vms_gather_munmap_vmas() - Put all VMAs within a range into a maple tree
* for removal at a later date. Handles splitting first and last if necessary * for removal at a later date. Handles splitting first and last if necessary
* and marking the vmas as isolated. * and marking the vmas as isolated.
* *
* @vmi: The vma iterator * @vms: The vma munmap struct
* @vma: The starting vm_area_struct
* @mm: The mm_struct
* @start: The aligned start address to munmap.
* @end: The aligned end address to munmap.
* @uf: The userfaultfd list_head
* @mas_detach: The maple state tracking the detached tree * @mas_detach: The maple state tracking the detached tree
* @locked_vm: a pointer to store the VM_LOCKED pages count.
* *
* Return: 0 on success, -EPERM on mseal vmas, -ENOMEM otherwise * Return: 0 on success, -EPERM on mseal vmas, -ENOMEM otherwise
*/ */
static int static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, struct ma_state *mas_detach)
struct mm_struct *mm, unsigned long start,
unsigned long end, struct list_head *uf,
struct ma_state *mas_detach, unsigned long *locked_vm)
{ {
struct vm_area_struct *next = NULL; struct vm_area_struct *next = NULL;
int count = 0;
int error = -ENOMEM; int error = -ENOMEM;
/* /*
...@@ -771,23 +778,24 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, ...@@ -771,23 +778,24 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
*/ */
/* Does it split the first one? */ /* Does it split the first one? */
if (start > vma->vm_start) { if (vms->start > vms->vma->vm_start) {
/* /*
* Make sure that map_count on return from munmap() will * Make sure that map_count on return from munmap() will
* not exceed its limit; but let map_count go just above * not exceed its limit; but let map_count go just above
* its limit temporarily, to help free resources as expected. * its limit temporarily, to help free resources as expected.
*/ */
if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count) if (vms->end < vms->vma->vm_end &&
vms->mm->map_count >= sysctl_max_map_count)
goto map_count_exceeded; goto map_count_exceeded;
/* Don't bother splitting the VMA if we can't unmap it anyway */ /* Don't bother splitting the VMA if we can't unmap it anyway */
if (!can_modify_vma(vma)) { if (!can_modify_vma(vms->vma)) {
error = -EPERM; error = -EPERM;
goto start_split_failed; goto start_split_failed;
} }
if (__split_vma(vmi, vma, start, 1)) if (__split_vma(vms->vmi, vms->vma, vms->start, 1))
goto start_split_failed; goto start_split_failed;
} }
...@@ -795,7 +803,7 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, ...@@ -795,7 +803,7 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
* Detach a range of VMAs from the mm. Using next as a temp variable as * Detach a range of VMAs from the mm. Using next as a temp variable as
* it is always overwritten. * it is always overwritten.
*/ */
next = vma; next = vms->vma;
do { do {
if (!can_modify_vma(next)) { if (!can_modify_vma(next)) {
error = -EPERM; error = -EPERM;
...@@ -803,20 +811,20 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, ...@@ -803,20 +811,20 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
} }
/* Does it split the end? */ /* Does it split the end? */
if (next->vm_end > end) { if (next->vm_end > vms->end) {
if (__split_vma(vmi, next, end, 0)) if (__split_vma(vms->vmi, next, vms->end, 0))
goto end_split_failed; goto end_split_failed;
} }
vma_start_write(next); vma_start_write(next);
mas_set(mas_detach, count++); mas_set(mas_detach, vms->vma_count++);
if (mas_store_gfp(mas_detach, next, GFP_KERNEL)) if (mas_store_gfp(mas_detach, next, GFP_KERNEL))
goto munmap_gather_failed; goto munmap_gather_failed;
vma_mark_detached(next, true); vma_mark_detached(next, true);
if (next->vm_flags & VM_LOCKED) if (next->vm_flags & VM_LOCKED)
*locked_vm += vma_pages(next); vms->locked_vm += vma_pages(next);
if (unlikely(uf)) { if (unlikely(vms->uf)) {
/* /*
* If userfaultfd_unmap_prep returns an error the vmas * If userfaultfd_unmap_prep returns an error the vmas
* will remain split, but userland will get a * will remain split, but userland will get a
...@@ -826,14 +834,15 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, ...@@ -826,14 +834,15 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
* split, despite we could. This is unlikely enough * split, despite we could. This is unlikely enough
* failure that it's not worth optimizing it for. * failure that it's not worth optimizing it for.
*/ */
if (userfaultfd_unmap_prep(next, start, end, uf)) if (userfaultfd_unmap_prep(next, vms->start, vms->end,
vms->uf))
goto userfaultfd_error; goto userfaultfd_error;
} }
#ifdef CONFIG_DEBUG_VM_MAPLE_TREE #ifdef CONFIG_DEBUG_VM_MAPLE_TREE
BUG_ON(next->vm_start < start); BUG_ON(next->vm_start < vms->start);
BUG_ON(next->vm_start > end); BUG_ON(next->vm_start > vms->end);
#endif #endif
} for_each_vma_range(*vmi, next, end); } for_each_vma_range(*(vms->vmi), next, vms->end);
#if defined(CONFIG_DEBUG_VM_MAPLE_TREE) #if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
/* Make sure no VMAs are about to be lost. */ /* Make sure no VMAs are about to be lost. */
...@@ -842,21 +851,21 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, ...@@ -842,21 +851,21 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct vm_area_struct *vma_mas, *vma_test; struct vm_area_struct *vma_mas, *vma_test;
int test_count = 0; int test_count = 0;
vma_iter_set(vmi, start); vma_iter_set(vms->vmi, vms->start);
rcu_read_lock(); rcu_read_lock();
vma_test = mas_find(&test, count - 1); vma_test = mas_find(&test, vms->vma_count - 1);
for_each_vma_range(*vmi, vma_mas, end) { for_each_vma_range(*(vms->vmi), vma_mas, vms->end) {
BUG_ON(vma_mas != vma_test); BUG_ON(vma_mas != vma_test);
test_count++; test_count++;
vma_test = mas_next(&test, count - 1); vma_test = mas_next(&test, vms->vma_count - 1);
} }
rcu_read_unlock(); rcu_read_unlock();
BUG_ON(count != test_count); BUG_ON(vms->vma_count != test_count);
} }
#endif #endif
while (vma_iter_addr(vmi) > start) while (vma_iter_addr(vms->vmi) > vms->start)
vma_iter_prev_range(vmi); vma_iter_prev_range(vms->vmi);
return 0; return 0;
...@@ -892,11 +901,11 @@ int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, ...@@ -892,11 +901,11 @@ int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
MA_STATE(mas_detach, &mt_detach, 0, 0); MA_STATE(mas_detach, &mt_detach, 0, 0);
mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
mt_on_stack(mt_detach); mt_on_stack(mt_detach);
struct vma_munmap_struct vms;
int error; int error;
unsigned long locked_vm = 0;
error = vmi_gather_munmap_vmas(vmi, vma, mm, start, end, uf, init_vma_munmap(&vms, vmi, vma, start, end, uf, unlock);
&mas_detach, &locked_vm); error = vms_gather_munmap_vmas(&vms, &mas_detach);
if (error) if (error)
goto gather_failed; goto gather_failed;
...@@ -905,8 +914,7 @@ int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, ...@@ -905,8 +914,7 @@ int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
goto clear_tree_failed; goto clear_tree_failed;
/* Point of no return */ /* Point of no return */
vmi_complete_munmap_vmas(vmi, vma, mm, start, end, unlock, &mas_detach, vms_complete_munmap_vmas(&vms, &mas_detach);
locked_vm);
return 0; return 0;
clear_tree_failed: clear_tree_failed:
......
...@@ -26,6 +26,22 @@ struct unlink_vma_file_batch { ...@@ -26,6 +26,22 @@ struct unlink_vma_file_batch {
struct vm_area_struct *vmas[8]; struct vm_area_struct *vmas[8];
}; };
/*
* vma munmap operation
*/
struct vma_munmap_struct {
struct vma_iterator *vmi;
struct mm_struct *mm;
struct vm_area_struct *vma; /* The first vma to munmap */
struct list_head *uf; /* Userfaultfd list_head */
unsigned long start; /* Aligned start addr (inclusive) */
unsigned long end; /* Aligned end addr (exclusive) */
int vma_count; /* Number of vmas that will be removed */
unsigned long nr_pages; /* Number of pages being removed */
unsigned long locked_vm; /* Number of locked pages */
bool unlock; /* Unlock after the munmap */
};
#ifdef CONFIG_DEBUG_VM_MAPLE_TREE #ifdef CONFIG_DEBUG_VM_MAPLE_TREE
void validate_mm(struct mm_struct *mm); void validate_mm(struct mm_struct *mm);
#else #else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment