Commit 4b10e7d5 authored by Mel Gorman's avatar Mel Gorman

mm: mempolicy: Implement change_prot_numa() in terms of change_protection()

This patch converts change_prot_numa() to use change_protection(). As
pte_numa and friends check the PTE bits directly it is necessary for
change_protection() to use pmd_mknuma(). Hence the required
modifications to change_protection() are a little clumsy but the
end result is that most of the numa page table helpers are just one or
two instructions.
Signed-off-by: default avatarMel Gorman <mgorman@suse.de>
parent b24f53a0
...@@ -27,7 +27,8 @@ extern int move_huge_pmd(struct vm_area_struct *vma, ...@@ -27,7 +27,8 @@ extern int move_huge_pmd(struct vm_area_struct *vma,
unsigned long new_addr, unsigned long old_end, unsigned long new_addr, unsigned long old_end,
pmd_t *old_pmd, pmd_t *new_pmd); pmd_t *old_pmd, pmd_t *new_pmd);
extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, pgprot_t newprot); unsigned long addr, pgprot_t newprot,
int prot_numa);
enum transparent_hugepage_flag { enum transparent_hugepage_flag {
TRANSPARENT_HUGEPAGE_FLAG, TRANSPARENT_HUGEPAGE_FLAG,
......
...@@ -1080,7 +1080,7 @@ extern unsigned long do_mremap(unsigned long addr, ...@@ -1080,7 +1080,7 @@ extern unsigned long do_mremap(unsigned long addr,
unsigned long flags, unsigned long new_addr); unsigned long flags, unsigned long new_addr);
extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgprot_t newprot, unsigned long end, pgprot_t newprot,
int dirty_accountable); int dirty_accountable, int prot_numa);
extern int mprotect_fixup(struct vm_area_struct *vma, extern int mprotect_fixup(struct vm_area_struct *vma,
struct vm_area_struct **pprev, unsigned long start, struct vm_area_struct **pprev, unsigned long start,
unsigned long end, unsigned long newflags); unsigned long end, unsigned long newflags);
...@@ -1552,7 +1552,7 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) ...@@ -1552,7 +1552,7 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
#endif #endif
#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE #ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
void change_prot_numa(struct vm_area_struct *vma, unsigned long change_prot_numa(struct vm_area_struct *vma,
unsigned long start, unsigned long end); unsigned long start, unsigned long end);
#endif #endif
......
...@@ -1147,7 +1147,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, ...@@ -1147,7 +1147,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
} }
int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, pgprot_t newprot) unsigned long addr, pgprot_t newprot, int prot_numa)
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
int ret = 0; int ret = 0;
...@@ -1155,7 +1155,17 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -1155,7 +1155,17 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
if (__pmd_trans_huge_lock(pmd, vma) == 1) { if (__pmd_trans_huge_lock(pmd, vma) == 1) {
pmd_t entry; pmd_t entry;
entry = pmdp_get_and_clear(mm, addr, pmd); entry = pmdp_get_and_clear(mm, addr, pmd);
entry = pmd_modify(entry, newprot); if (!prot_numa)
entry = pmd_modify(entry, newprot);
else {
struct page *page = pmd_page(*pmd);
/* only check non-shared pages */
if (page_mapcount(page) == 1 &&
!pmd_numa(*pmd)) {
entry = pmd_mknuma(entry);
}
}
set_pmd_at(mm, addr, pmd, entry); set_pmd_at(mm, addr, pmd, entry);
spin_unlock(&vma->vm_mm->page_table_lock); spin_unlock(&vma->vm_mm->page_table_lock);
ret = 1; ret = 1;
......
...@@ -568,134 +568,23 @@ static inline int check_pgd_range(struct vm_area_struct *vma, ...@@ -568,134 +568,23 @@ static inline int check_pgd_range(struct vm_area_struct *vma,
#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE #ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
/* /*
* Here we search for not shared page mappings (mapcount == 1) and we * This is used to mark a range of virtual addresses to be inaccessible.
* set up the pmd/pte_numa on those mappings so the very next access * These are later cleared by a NUMA hinting fault. Depending on these
* will fire a NUMA hinting page fault. * faults, pages may be migrated for better NUMA placement.
*
* This is assuming that NUMA faults are handled using PROT_NONE. If
* an architecture makes a different choice, it will need further
* changes to the core.
*/ */
static int unsigned long change_prot_numa(struct vm_area_struct *vma,
change_prot_numa_range(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long end)
unsigned long address)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte, *_pte;
struct page *page;
unsigned long _address, end;
spinlock_t *ptl;
int ret = 0;
VM_BUG_ON(address & ~PAGE_MASK);
pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
goto out;
pud = pud_offset(pgd, address);
if (!pud_present(*pud))
goto out;
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
goto out;
if (pmd_trans_huge_lock(pmd, vma) == 1) {
int page_nid;
ret = HPAGE_PMD_NR;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
if (pmd_numa(*pmd)) {
spin_unlock(&mm->page_table_lock);
goto out;
}
page = pmd_page(*pmd);
/* only check non-shared pages */
if (page_mapcount(page) != 1) {
spin_unlock(&mm->page_table_lock);
goto out;
}
page_nid = page_to_nid(page);
if (pmd_numa(*pmd)) {
spin_unlock(&mm->page_table_lock);
goto out;
}
set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
ret += HPAGE_PMD_NR;
/* defer TLB flush to lower the overhead */
spin_unlock(&mm->page_table_lock);
goto out;
}
if (pmd_trans_unstable(pmd))
goto out;
VM_BUG_ON(!pmd_present(*pmd));
end = min(vma->vm_end, (address + PMD_SIZE) & PMD_MASK);
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
for (_address = address, _pte = pte; _address < end;
_pte++, _address += PAGE_SIZE) {
pte_t pteval = *_pte;
if (!pte_present(pteval))
continue;
if (pte_numa(pteval))
continue;
page = vm_normal_page(vma, _address, pteval);
if (unlikely(!page))
continue;
/* only check non-shared pages */
if (page_mapcount(page) != 1)
continue;
set_pte_at(mm, _address, _pte, pte_mknuma(pteval));
/* defer TLB flush to lower the overhead */
ret++;
}
pte_unmap_unlock(pte, ptl);
if (ret && !pmd_numa(*pmd)) {
spin_lock(&mm->page_table_lock);
set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
spin_unlock(&mm->page_table_lock);
/* defer TLB flush to lower the overhead */
}
out:
return ret;
}
/* Assumes mmap_sem is held */
void
change_prot_numa(struct vm_area_struct *vma,
unsigned long address, unsigned long end)
{ {
struct mm_struct *mm = vma->vm_mm; int nr_updated;
int progress = 0; BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE);
while (address < end) {
VM_BUG_ON(address < vma->vm_start ||
address + PAGE_SIZE > vma->vm_end);
progress += change_prot_numa_range(mm, vma, address); nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1);
address = (address + PMD_SIZE) & PMD_MASK;
}
/* return nr_updated;
* Flush the TLB for the mm to start the NUMA hinting
* page faults after we finish scanning this vma part
* if there were any PTE updates
*/
if (progress) {
mmu_notifier_invalidate_range_start(vma->vm_mm, address, end);
flush_tlb_range(vma, address, end);
mmu_notifier_invalidate_range_end(vma->vm_mm, address, end);
}
} }
#else #else
static unsigned long change_prot_numa(struct vm_area_struct *vma, static unsigned long change_prot_numa(struct vm_area_struct *vma,
......
...@@ -35,10 +35,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) ...@@ -35,10 +35,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
} }
#endif #endif
static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd, static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long addr, unsigned long end, pgprot_t newprot,
int dirty_accountable) int dirty_accountable, int prot_numa)
{ {
struct mm_struct *mm = vma->vm_mm;
pte_t *pte, oldpte; pte_t *pte, oldpte;
spinlock_t *ptl; spinlock_t *ptl;
unsigned long pages = 0; unsigned long pages = 0;
...@@ -49,19 +50,39 @@ static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd, ...@@ -49,19 +50,39 @@ static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd,
oldpte = *pte; oldpte = *pte;
if (pte_present(oldpte)) { if (pte_present(oldpte)) {
pte_t ptent; pte_t ptent;
bool updated = false;
ptent = ptep_modify_prot_start(mm, addr, pte); ptent = ptep_modify_prot_start(mm, addr, pte);
ptent = pte_modify(ptent, newprot); if (!prot_numa) {
ptent = pte_modify(ptent, newprot);
updated = true;
} else {
struct page *page;
page = vm_normal_page(vma, addr, oldpte);
if (page) {
/* only check non-shared pages */
if (!pte_numa(oldpte) &&
page_mapcount(page) == 1) {
ptent = pte_mknuma(ptent);
updated = true;
}
}
}
/* /*
* Avoid taking write faults for pages we know to be * Avoid taking write faults for pages we know to be
* dirty. * dirty.
*/ */
if (dirty_accountable && pte_dirty(ptent)) if (dirty_accountable && pte_dirty(ptent)) {
ptent = pte_mkwrite(ptent); ptent = pte_mkwrite(ptent);
updated = true;
}
if (updated)
pages++;
ptep_modify_prot_commit(mm, addr, pte, ptent); ptep_modify_prot_commit(mm, addr, pte, ptent);
pages++;
} else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
swp_entry_t entry = pte_to_swp_entry(oldpte); swp_entry_t entry = pte_to_swp_entry(oldpte);
...@@ -83,9 +104,25 @@ static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd, ...@@ -83,9 +104,25 @@ static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd,
return pages; return pages;
} }
#ifdef CONFIG_NUMA_BALANCING
static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
pmd_t *pmd)
{
spin_lock(&mm->page_table_lock);
set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd));
spin_unlock(&mm->page_table_lock);
}
#else
static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
pmd_t *pmd)
{
BUG();
}
#endif /* CONFIG_NUMA_BALANCING */
static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud,
unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long addr, unsigned long end, pgprot_t newprot,
int dirty_accountable) int dirty_accountable, int prot_numa)
{ {
pmd_t *pmd; pmd_t *pmd;
unsigned long next; unsigned long next;
...@@ -97,7 +134,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t * ...@@ -97,7 +134,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
if (pmd_trans_huge(*pmd)) { if (pmd_trans_huge(*pmd)) {
if (next - addr != HPAGE_PMD_SIZE) if (next - addr != HPAGE_PMD_SIZE)
split_huge_page_pmd(vma->vm_mm, pmd); split_huge_page_pmd(vma->vm_mm, pmd);
else if (change_huge_pmd(vma, pmd, addr, newprot)) { else if (change_huge_pmd(vma, pmd, addr, newprot, prot_numa)) {
pages += HPAGE_PMD_NR; pages += HPAGE_PMD_NR;
continue; continue;
} }
...@@ -105,8 +142,11 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t * ...@@ -105,8 +142,11 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
} }
if (pmd_none_or_clear_bad(pmd)) if (pmd_none_or_clear_bad(pmd))
continue; continue;
pages += change_pte_range(vma->vm_mm, pmd, addr, next, newprot, pages += change_pte_range(vma, pmd, addr, next, newprot,
dirty_accountable); dirty_accountable, prot_numa);
if (prot_numa)
change_pmd_protnuma(vma->vm_mm, addr, pmd);
} while (pmd++, addr = next, addr != end); } while (pmd++, addr = next, addr != end);
return pages; return pages;
...@@ -114,7 +154,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t * ...@@ -114,7 +154,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd, static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long addr, unsigned long end, pgprot_t newprot,
int dirty_accountable) int dirty_accountable, int prot_numa)
{ {
pud_t *pud; pud_t *pud;
unsigned long next; unsigned long next;
...@@ -126,7 +166,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t * ...@@ -126,7 +166,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *
if (pud_none_or_clear_bad(pud)) if (pud_none_or_clear_bad(pud))
continue; continue;
pages += change_pmd_range(vma, pud, addr, next, newprot, pages += change_pmd_range(vma, pud, addr, next, newprot,
dirty_accountable); dirty_accountable, prot_numa);
} while (pud++, addr = next, addr != end); } while (pud++, addr = next, addr != end);
return pages; return pages;
...@@ -134,7 +174,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t * ...@@ -134,7 +174,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *
static unsigned long change_protection_range(struct vm_area_struct *vma, static unsigned long change_protection_range(struct vm_area_struct *vma,
unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long addr, unsigned long end, pgprot_t newprot,
int dirty_accountable) int dirty_accountable, int prot_numa)
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd; pgd_t *pgd;
...@@ -150,7 +190,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, ...@@ -150,7 +190,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
if (pgd_none_or_clear_bad(pgd)) if (pgd_none_or_clear_bad(pgd))
continue; continue;
pages += change_pud_range(vma, pgd, addr, next, newprot, pages += change_pud_range(vma, pgd, addr, next, newprot,
dirty_accountable); dirty_accountable, prot_numa);
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
/* Only flush the TLB if we actually modified any entries: */ /* Only flush the TLB if we actually modified any entries: */
...@@ -162,7 +202,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, ...@@ -162,7 +202,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgprot_t newprot, unsigned long end, pgprot_t newprot,
int dirty_accountable) int dirty_accountable, int prot_numa)
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
unsigned long pages; unsigned long pages;
...@@ -171,7 +211,7 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, ...@@ -171,7 +211,7 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
if (is_vm_hugetlb_page(vma)) if (is_vm_hugetlb_page(vma))
pages = hugetlb_change_protection(vma, start, end, newprot); pages = hugetlb_change_protection(vma, start, end, newprot);
else else
pages = change_protection_range(vma, start, end, newprot, dirty_accountable); pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);
mmu_notifier_invalidate_range_end(mm, start, end); mmu_notifier_invalidate_range_end(mm, start, end);
return pages; return pages;
...@@ -249,7 +289,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, ...@@ -249,7 +289,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
dirty_accountable = 1; dirty_accountable = 1;
} }
change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable); change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable, 0);
vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
vm_stat_account(mm, newflags, vma->vm_file, nrpages); vm_stat_account(mm, newflags, vma->vm_file, nrpages);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment