Commit 88c91dc5 authored by Hugh Dickins's avatar Hugh Dickins Committed by Andrew Morton

mempolicy: migration attempt to match interleave nodes

Improve alloc_migration_target_by_mpol()'s treatment of MPOL_INTERLEAVE.

Make an effort in do_mbind(), to identify the correct interleave index for
the first page to be migrated, so that it and all subsequent pages from
the same vma will be targeted to precisely their intended nodes.  Pages
from following vmas will still be interleaved from the requested nodemask,
but perhaps starting from a different base.

Whether this is worth doing at all, or worth improving further, is
arguable: queue_folio_required() is right not to care about the precise
placement on interleaved nodes; but this little effort seems appropriate.

[hughd@google.com: do vma_iter search under mmap_write_unlock()]
  Link: https://lkml.kernel.org/r/3311d544-fb05-a7f1-1b74-16aa0f6cd4fe@google.com
Link: https://lkml.kernel.org/r/77954a5-9c9b-1c11-7d5c-3262c01b895f@google.comSigned-off-by: default avatarHugh Dickins <hughd@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tejun heo <tj@kernel.org>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 72e315f7
...@@ -428,6 +428,11 @@ static bool strictly_unmovable(unsigned long flags) ...@@ -428,6 +428,11 @@ static bool strictly_unmovable(unsigned long flags)
MPOL_MF_STRICT; MPOL_MF_STRICT;
} }
struct migration_mpol { /* for alloc_migration_target_by_mpol() */
struct mempolicy *pol;
pgoff_t ilx;
};
struct queue_pages { struct queue_pages {
struct list_head *pagelist; struct list_head *pagelist;
unsigned long flags; unsigned long flags;
...@@ -1156,8 +1161,9 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, ...@@ -1156,8 +1161,9 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
static struct folio *alloc_migration_target_by_mpol(struct folio *src, static struct folio *alloc_migration_target_by_mpol(struct folio *src,
unsigned long private) unsigned long private)
{ {
struct mempolicy *pol = (struct mempolicy *)private; struct migration_mpol *mmpol = (struct migration_mpol *)private;
pgoff_t ilx = 0; /* improve on this later */ struct mempolicy *pol = mmpol->pol;
pgoff_t ilx = mmpol->ilx;
struct page *page; struct page *page;
unsigned int order; unsigned int order;
int nid = numa_node_id(); int nid = numa_node_id();
...@@ -1212,6 +1218,7 @@ static long do_mbind(unsigned long start, unsigned long len, ...@@ -1212,6 +1218,7 @@ static long do_mbind(unsigned long start, unsigned long len,
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev; struct vm_area_struct *vma, *prev;
struct vma_iterator vmi; struct vma_iterator vmi;
struct migration_mpol mmpol;
struct mempolicy *new; struct mempolicy *new;
unsigned long end; unsigned long end;
long err; long err;
...@@ -1284,17 +1291,55 @@ static long do_mbind(unsigned long start, unsigned long len, ...@@ -1284,17 +1291,55 @@ static long do_mbind(unsigned long start, unsigned long len,
} }
} }
mmap_write_unlock(mm);
if (!err && !list_empty(&pagelist)) { if (!err && !list_empty(&pagelist)) {
/* Convert MPOL_DEFAULT's NULL to task or default policy */ /* Convert MPOL_DEFAULT's NULL to task or default policy */
if (!new) { if (!new) {
new = get_task_policy(current); new = get_task_policy(current);
mpol_get(new); mpol_get(new);
} }
mmpol.pol = new;
mmpol.ilx = 0;
/*
* In the interleaved case, attempt to allocate on exactly the
* targeted nodes, for the first VMA to be migrated; for later
* VMAs, the nodes will still be interleaved from the targeted
* nodemask, but one by one may be selected differently.
*/
if (new->mode == MPOL_INTERLEAVE) {
struct page *page;
unsigned int order;
unsigned long addr = -EFAULT;
list_for_each_entry(page, &pagelist, lru) {
if (!PageKsm(page))
break;
}
if (!list_entry_is_head(page, &pagelist, lru)) {
vma_iter_init(&vmi, mm, start);
for_each_vma_range(vmi, vma, end) {
addr = page_address_in_vma(page, vma);
if (addr != -EFAULT)
break;
}
}
if (addr != -EFAULT) {
order = compound_order(page);
/* We already know the pol, but not the ilx */
mpol_cond_put(get_vma_policy(vma, addr, order,
&mmpol.ilx));
/* Set base from which to increment by index */
mmpol.ilx -= page->index >> order;
}
}
}
mmap_write_unlock(mm);
if (!err && !list_empty(&pagelist)) {
nr_failed |= migrate_pages(&pagelist, nr_failed |= migrate_pages(&pagelist,
alloc_migration_target_by_mpol, NULL, alloc_migration_target_by_mpol, NULL,
(unsigned long)new, MIGRATE_SYNC, (unsigned long)&mmpol, MIGRATE_SYNC,
MR_MEMPOLICY_MBIND, NULL); MR_MEMPOLICY_MBIND, NULL);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment