Commit a7f40cfe authored by Yang Shi's avatar Yang Shi Committed by Linus Torvalds

mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified

When MPOL_MF_STRICT was specified and an existing page was already on a
node that does not follow the policy, mbind() should return -EIO.  But
commit 6f4576e3 ("mempolicy: apply page table walker on
queue_pages_range()") broke the rule.

And commit c8633798 ("mm: mempolicy: mbind and migrate_pages support
thp migration") didn't return the correct value for THP mbind() too.

If MPOL_MF_STRICT is set, ignore vma_migratable() to make sure it
reaches queue_pages_to_pte_range() or queue_pages_pmd() to check if an
existing page was already on a node that does not follow the policy.
And, non-migratable vma may be used, return -EIO too if MPOL_MF_MOVE or
MPOL_MF_MOVE_ALL was specified.

Tested with https://github.com/metan-ucw/ltp/blob/master/testcases/kernel/syscalls/mbind/mbind02.c

[akpm@linux-foundation.org: tweak code comment]
Link: http://lkml.kernel.org/r/1553020556-38583-1-git-send-email-yang.shi@linux.alibaba.com
Fixes: 6f4576e3 ("mempolicy: apply page table walker on queue_pages_range()")
Signed-off-by: default avatarYang Shi <yang.shi@linux.alibaba.com>
Signed-off-by: default avatarOscar Salvador <osalvador@suse.de>
Reported-by: default avatarCyril Hrubis <chrubis@suse.cz>
Suggested-by: default avatarKirill A. Shutemov <kirill@shutemov.name>
Acked-by: default avatarRafael Aquini <aquini@redhat.com>
Reviewed-by: default avatarOscar Salvador <osalvador@suse.de>
Acked-by: default avatarDavid Rientjes <rientjes@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a953e772
...@@ -428,6 +428,13 @@ static inline bool queue_pages_required(struct page *page, ...@@ -428,6 +428,13 @@ static inline bool queue_pages_required(struct page *page,
return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT);
} }
/*
* queue_pages_pmd() has three possible return values:
* 1 - pages are placed on the right node or queued successfully.
* 0 - THP was split.
* -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing
* page was already on a node that does not follow the policy.
*/
static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
unsigned long end, struct mm_walk *walk) unsigned long end, struct mm_walk *walk)
{ {
...@@ -437,7 +444,7 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, ...@@ -437,7 +444,7 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
unsigned long flags; unsigned long flags;
if (unlikely(is_pmd_migration_entry(*pmd))) { if (unlikely(is_pmd_migration_entry(*pmd))) {
ret = 1; ret = -EIO;
goto unlock; goto unlock;
} }
page = pmd_page(*pmd); page = pmd_page(*pmd);
...@@ -454,8 +461,15 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, ...@@ -454,8 +461,15 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
ret = 1; ret = 1;
flags = qp->flags; flags = qp->flags;
/* go to thp migration */ /* go to thp migration */
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
if (!vma_migratable(walk->vma)) {
ret = -EIO;
goto unlock;
}
migrate_page_add(page, qp->pagelist, flags); migrate_page_add(page, qp->pagelist, flags);
} else
ret = -EIO;
unlock: unlock:
spin_unlock(ptl); spin_unlock(ptl);
out: out:
...@@ -480,8 +494,10 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, ...@@ -480,8 +494,10 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
ptl = pmd_trans_huge_lock(pmd, vma); ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) { if (ptl) {
ret = queue_pages_pmd(pmd, ptl, addr, end, walk); ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
if (ret) if (ret > 0)
return 0; return 0;
else if (ret < 0)
return ret;
} }
if (pmd_trans_unstable(pmd)) if (pmd_trans_unstable(pmd))
...@@ -502,11 +518,16 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, ...@@ -502,11 +518,16 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
continue; continue;
if (!queue_pages_required(page, qp)) if (!queue_pages_required(page, qp))
continue; continue;
migrate_page_add(page, qp->pagelist, flags); if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
if (!vma_migratable(vma))
break;
migrate_page_add(page, qp->pagelist, flags);
} else
break;
} }
pte_unmap_unlock(pte - 1, ptl); pte_unmap_unlock(pte - 1, ptl);
cond_resched(); cond_resched();
return 0; return addr != end ? -EIO : 0;
} }
static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
...@@ -576,7 +597,12 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, ...@@ -576,7 +597,12 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
unsigned long endvma = vma->vm_end; unsigned long endvma = vma->vm_end;
unsigned long flags = qp->flags; unsigned long flags = qp->flags;
if (!vma_migratable(vma)) /*
* Need check MPOL_MF_STRICT to return -EIO if possible
* regardless of vma_migratable
*/
if (!vma_migratable(vma) &&
!(flags & MPOL_MF_STRICT))
return 1; return 1;
if (endvma > end) if (endvma > end)
...@@ -603,7 +629,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, ...@@ -603,7 +629,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
} }
/* queue pages from current vma */ /* queue pages from current vma */
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) if (flags & MPOL_MF_VALID)
return 0; return 0;
return 1; return 1;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment