Commit fd892593 authored by Liam R. Howlett's avatar Liam R. Howlett Committed by Andrew Morton

mm: change do_vmi_align_munmap() tracking of VMAs to remove

The majority of the calls to munmap a vm range is within a single vma.
The maple tree is able to store a single entry at 0, with a size of 1 as
a pointer and avoid any allocations.  Change do_vmi_align_munmap() to
store the VMAs being munmap()'ed into a tree indexed by the count.  This
will leverage the ability to store the first entry without a node
allocation.

Storing the entries into a tree by the count and not the vma start and
end means changing the functions which iterate over the entries.  Update
unmap_vmas() and free_pgtables() to take a maple state and a tree end
address to support this functionality.

Passing through the same maple state to unmap_vmas() and free_pgtables()
means the state needs to be reset between calls.  This happens in the
static unmap_region() and exit_mmap().

Link: https://lkml.kernel.org/r/20230724183157.3939892-4-Liam.Howlett@oracle.comSigned-off-by: default avatarLiam R. Howlett <Liam.Howlett@oracle.com>
Cc: Peng Zhang <zhangpeng.00@bytedance.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 8c314f3b
...@@ -2287,9 +2287,9 @@ static inline void zap_vma_pages(struct vm_area_struct *vma) ...@@ -2287,9 +2287,9 @@ static inline void zap_vma_pages(struct vm_area_struct *vma)
zap_page_range_single(vma, vma->vm_start, zap_page_range_single(vma, vma->vm_start,
vma->vm_end - vma->vm_start, NULL); vma->vm_end - vma->vm_start, NULL);
} }
void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *start_vma, unsigned long start, struct vm_area_struct *start_vma, unsigned long start,
unsigned long end, bool mm_wr_locked); unsigned long end, unsigned long tree_end, bool mm_wr_locked);
struct mmu_notifier_range; struct mmu_notifier_range;
......
...@@ -109,7 +109,7 @@ bool __folio_end_writeback(struct folio *folio); ...@@ -109,7 +109,7 @@ bool __folio_end_writeback(struct folio *folio);
void deactivate_file_folio(struct folio *folio); void deactivate_file_folio(struct folio *folio);
void folio_activate(struct folio *folio); void folio_activate(struct folio *folio);
void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt, void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *start_vma, unsigned long floor, struct vm_area_struct *start_vma, unsigned long floor,
unsigned long ceiling, bool mm_wr_locked); unsigned long ceiling, bool mm_wr_locked);
void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte); void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);
......
...@@ -361,12 +361,10 @@ void free_pgd_range(struct mmu_gather *tlb, ...@@ -361,12 +361,10 @@ void free_pgd_range(struct mmu_gather *tlb,
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
} }
void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt, void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *vma, unsigned long floor, struct vm_area_struct *vma, unsigned long floor,
unsigned long ceiling, bool mm_wr_locked) unsigned long ceiling, bool mm_wr_locked)
{ {
MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
do { do {
unsigned long addr = vma->vm_start; unsigned long addr = vma->vm_start;
struct vm_area_struct *next; struct vm_area_struct *next;
...@@ -375,7 +373,7 @@ void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt, ...@@ -375,7 +373,7 @@ void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
* Note: USER_PGTABLES_CEILING may be passed as ceiling and may * Note: USER_PGTABLES_CEILING may be passed as ceiling and may
* be 0. This will underflow and is okay. * be 0. This will underflow and is okay.
*/ */
next = mas_find(&mas, ceiling - 1); next = mas_find(mas, ceiling - 1);
/* /*
* Hide vma from rmap and truncate_pagecache before freeing * Hide vma from rmap and truncate_pagecache before freeing
...@@ -396,7 +394,7 @@ void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt, ...@@ -396,7 +394,7 @@ void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
while (next && next->vm_start <= vma->vm_end + PMD_SIZE while (next && next->vm_start <= vma->vm_end + PMD_SIZE
&& !is_vm_hugetlb_page(next)) { && !is_vm_hugetlb_page(next)) {
vma = next; vma = next;
next = mas_find(&mas, ceiling - 1); next = mas_find(mas, ceiling - 1);
if (mm_wr_locked) if (mm_wr_locked)
vma_start_write(vma); vma_start_write(vma);
unlink_anon_vmas(vma); unlink_anon_vmas(vma);
...@@ -1713,9 +1711,10 @@ static void unmap_single_vma(struct mmu_gather *tlb, ...@@ -1713,9 +1711,10 @@ static void unmap_single_vma(struct mmu_gather *tlb,
* ensure that any thus-far unmapped pages are flushed before unmap_vmas() * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
* drops the lock and schedules. * drops the lock and schedules.
*/ */
void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *vma, unsigned long start_addr, struct vm_area_struct *vma, unsigned long start_addr,
unsigned long end_addr, bool mm_wr_locked) unsigned long end_addr, unsigned long tree_end,
bool mm_wr_locked)
{ {
struct mmu_notifier_range range; struct mmu_notifier_range range;
struct zap_details details = { struct zap_details details = {
...@@ -1723,7 +1722,6 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, ...@@ -1723,7 +1722,6 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
/* Careful - we need to zap private pages too! */ /* Careful - we need to zap private pages too! */
.even_cows = true, .even_cows = true,
}; };
MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm, mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm,
start_addr, end_addr); start_addr, end_addr);
...@@ -1731,7 +1729,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, ...@@ -1731,7 +1729,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
do { do {
unmap_single_vma(tlb, vma, start_addr, end_addr, &details, unmap_single_vma(tlb, vma, start_addr, end_addr, &details,
mm_wr_locked); mm_wr_locked);
} while ((vma = mas_find(&mas, end_addr - 1)) != NULL); } while ((vma = mas_find(mas, tree_end - 1)) != NULL);
mmu_notifier_invalidate_range_end(&range); mmu_notifier_invalidate_range_end(&range);
} }
......
...@@ -76,10 +76,10 @@ int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS; ...@@ -76,10 +76,10 @@ int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
static bool ignore_rlimit_data; static bool ignore_rlimit_data;
core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644); core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
static void unmap_region(struct mm_struct *mm, struct maple_tree *mt, static void unmap_region(struct mm_struct *mm, struct ma_state *mas,
struct vm_area_struct *vma, struct vm_area_struct *prev, struct vm_area_struct *vma, struct vm_area_struct *prev,
struct vm_area_struct *next, unsigned long start, struct vm_area_struct *next, unsigned long start,
unsigned long end, bool mm_wr_locked); unsigned long end, unsigned long tree_end, bool mm_wr_locked);
static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags) static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
{ {
...@@ -2293,18 +2293,20 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas) ...@@ -2293,18 +2293,20 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas)
* *
* Called with the mm semaphore held. * Called with the mm semaphore held.
*/ */
static void unmap_region(struct mm_struct *mm, struct maple_tree *mt, static void unmap_region(struct mm_struct *mm, struct ma_state *mas,
struct vm_area_struct *vma, struct vm_area_struct *prev, struct vm_area_struct *vma, struct vm_area_struct *prev,
struct vm_area_struct *next, struct vm_area_struct *next, unsigned long start,
unsigned long start, unsigned long end, bool mm_wr_locked) unsigned long end, unsigned long tree_end, bool mm_wr_locked)
{ {
struct mmu_gather tlb; struct mmu_gather tlb;
unsigned long mt_start = mas->index;
lru_add_drain(); lru_add_drain();
tlb_gather_mmu(&tlb, mm); tlb_gather_mmu(&tlb, mm);
update_hiwater_rss(mm); update_hiwater_rss(mm);
unmap_vmas(&tlb, mt, vma, start, end, mm_wr_locked); unmap_vmas(&tlb, mas, vma, start, end, tree_end, mm_wr_locked);
free_pgtables(&tlb, mt, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, mas_set(mas, mt_start);
free_pgtables(&tlb, mas, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
next ? next->vm_start : USER_PGTABLES_CEILING, next ? next->vm_start : USER_PGTABLES_CEILING,
mm_wr_locked); mm_wr_locked);
tlb_finish_mmu(&tlb); tlb_finish_mmu(&tlb);
...@@ -2472,7 +2474,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, ...@@ -2472,7 +2474,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
goto end_split_failed; goto end_split_failed;
} }
vma_start_write(next); vma_start_write(next);
mas_set_range(&mas_detach, next->vm_start, next->vm_end - 1); mas_set(&mas_detach, count);
error = mas_store_gfp(&mas_detach, next, GFP_KERNEL); error = mas_store_gfp(&mas_detach, next, GFP_KERNEL);
if (error) if (error)
goto munmap_gather_failed; goto munmap_gather_failed;
...@@ -2511,17 +2513,17 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, ...@@ -2511,17 +2513,17 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
#if defined(CONFIG_DEBUG_VM_MAPLE_TREE) #if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
/* Make sure no VMAs are about to be lost. */ /* Make sure no VMAs are about to be lost. */
{ {
MA_STATE(test, &mt_detach, start, end - 1); MA_STATE(test, &mt_detach, 0, 0);
struct vm_area_struct *vma_mas, *vma_test; struct vm_area_struct *vma_mas, *vma_test;
int test_count = 0; int test_count = 0;
vma_iter_set(vmi, start); vma_iter_set(vmi, start);
rcu_read_lock(); rcu_read_lock();
vma_test = mas_find(&test, end - 1); vma_test = mas_find(&test, count - 1);
for_each_vma_range(*vmi, vma_mas, end) { for_each_vma_range(*vmi, vma_mas, end) {
BUG_ON(vma_mas != vma_test); BUG_ON(vma_mas != vma_test);
test_count++; test_count++;
vma_test = mas_next(&test, end - 1); vma_test = mas_next(&test, count - 1);
} }
rcu_read_unlock(); rcu_read_unlock();
BUG_ON(count != test_count); BUG_ON(count != test_count);
...@@ -2542,9 +2544,11 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, ...@@ -2542,9 +2544,11 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
* We can free page tables without write-locking mmap_lock because VMAs * We can free page tables without write-locking mmap_lock because VMAs
* were isolated before we downgraded mmap_lock. * were isolated before we downgraded mmap_lock.
*/ */
unmap_region(mm, &mt_detach, vma, prev, next, start, end, !unlock); mas_set(&mas_detach, 1);
unmap_region(mm, &mas_detach, vma, prev, next, start, end, count,
!unlock);
/* Statistics and freeing VMAs */ /* Statistics and freeing VMAs */
mas_set(&mas_detach, start); mas_set(&mas_detach, 0);
remove_mt(mm, &mas_detach); remove_mt(mm, &mas_detach);
validate_mm(mm); validate_mm(mm);
if (unlock) if (unlock)
...@@ -2864,9 +2868,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr, ...@@ -2864,9 +2868,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
fput(vma->vm_file); fput(vma->vm_file);
vma->vm_file = NULL; vma->vm_file = NULL;
vma_iter_set(&vmi, vma->vm_end);
/* Undo any partial mapping done by a device driver. */ /* Undo any partial mapping done by a device driver. */
unmap_region(mm, &mm->mm_mt, vma, prev, next, vma->vm_start, unmap_region(mm, &vmi.mas, vma, prev, next, vma->vm_start,
vma->vm_end, true); vma->vm_end, vma->vm_end, true);
} }
if (file && (vm_flags & VM_SHARED)) if (file && (vm_flags & VM_SHARED))
mapping_unmap_writable(file->f_mapping); mapping_unmap_writable(file->f_mapping);
...@@ -3185,7 +3190,7 @@ void exit_mmap(struct mm_struct *mm) ...@@ -3185,7 +3190,7 @@ void exit_mmap(struct mm_struct *mm)
tlb_gather_mmu_fullmm(&tlb, mm); tlb_gather_mmu_fullmm(&tlb, mm);
/* update_hiwater_rss(mm) here? but nobody should be looking */ /* update_hiwater_rss(mm) here? but nobody should be looking */
/* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */ /* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */
unmap_vmas(&tlb, &mm->mm_mt, vma, 0, ULONG_MAX, false); unmap_vmas(&tlb, &mas, vma, 0, ULONG_MAX, ULONG_MAX, false);
mmap_read_unlock(mm); mmap_read_unlock(mm);
/* /*
...@@ -3195,7 +3200,8 @@ void exit_mmap(struct mm_struct *mm) ...@@ -3195,7 +3200,8 @@ void exit_mmap(struct mm_struct *mm)
set_bit(MMF_OOM_SKIP, &mm->flags); set_bit(MMF_OOM_SKIP, &mm->flags);
mmap_write_lock(mm); mmap_write_lock(mm);
mt_clear_in_rcu(&mm->mm_mt); mt_clear_in_rcu(&mm->mm_mt);
free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS, mas_set(&mas, vma->vm_end);
free_pgtables(&tlb, &mas, vma, FIRST_USER_ADDRESS,
USER_PGTABLES_CEILING, true); USER_PGTABLES_CEILING, true);
tlb_finish_mmu(&tlb); tlb_finish_mmu(&tlb);
...@@ -3204,6 +3210,7 @@ void exit_mmap(struct mm_struct *mm) ...@@ -3204,6 +3210,7 @@ void exit_mmap(struct mm_struct *mm)
* enabled, without holding any MM locks besides the unreachable * enabled, without holding any MM locks besides the unreachable
* mmap_write_lock. * mmap_write_lock.
*/ */
mas_set(&mas, vma->vm_end);
do { do {
if (vma->vm_flags & VM_ACCOUNT) if (vma->vm_flags & VM_ACCOUNT)
nr_accounted += vma_pages(vma); nr_accounted += vma_pages(vma);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment