Commit aac2fea9 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds

rmap: do not call mmu_notifier_invalidate_page() under ptl

MMU notifiers can sleep, but in page_mkclean_one() we call
mmu_notifier_invalidate_page() under page table lock.

Let's instead use mmu_notifier_invalidate_range() outside
page_vma_mapped_walk() loop.

[jglisse@redhat.com: try_to_unmap_one() do not call mmu_notifier under ptl]
  Link: http://lkml.kernel.org/r/20170809204333.27485-1-jglisse@redhat.com
Link: http://lkml.kernel.org/r/20170804134928.l4klfcnqatni7vsc@black.fi.intel.com
Fixes: c7ab0d2f ("mm: convert try_to_unmap_one() to use page_vma_mapped_walk()")
Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: default avatarJérôme Glisse <jglisse@redhat.com>
Reported-by: default avataraxie <axie@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Writer, Tim" <Tim.Writer@amd.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent d041353d
...@@ -888,10 +888,10 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, ...@@ -888,10 +888,10 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
.flags = PVMW_SYNC, .flags = PVMW_SYNC,
}; };
int *cleaned = arg; int *cleaned = arg;
bool invalidation_needed = false;
while (page_vma_mapped_walk(&pvmw)) { while (page_vma_mapped_walk(&pvmw)) {
int ret = 0; int ret = 0;
address = pvmw.address;
if (pvmw.pte) { if (pvmw.pte) {
pte_t entry; pte_t entry;
pte_t *pte = pvmw.pte; pte_t *pte = pvmw.pte;
...@@ -899,11 +899,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, ...@@ -899,11 +899,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
if (!pte_dirty(*pte) && !pte_write(*pte)) if (!pte_dirty(*pte) && !pte_write(*pte))
continue; continue;
flush_cache_page(vma, address, pte_pfn(*pte)); flush_cache_page(vma, pvmw.address, pte_pfn(*pte));
entry = ptep_clear_flush(vma, address, pte); entry = ptep_clear_flush(vma, pvmw.address, pte);
entry = pte_wrprotect(entry); entry = pte_wrprotect(entry);
entry = pte_mkclean(entry); entry = pte_mkclean(entry);
set_pte_at(vma->vm_mm, address, pte, entry); set_pte_at(vma->vm_mm, pvmw.address, pte, entry);
ret = 1; ret = 1;
} else { } else {
#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
...@@ -913,11 +913,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, ...@@ -913,11 +913,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
if (!pmd_dirty(*pmd) && !pmd_write(*pmd)) if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
continue; continue;
flush_cache_page(vma, address, page_to_pfn(page)); flush_cache_page(vma, pvmw.address, page_to_pfn(page));
entry = pmdp_huge_clear_flush(vma, address, pmd); entry = pmdp_huge_clear_flush(vma, pvmw.address, pmd);
entry = pmd_wrprotect(entry); entry = pmd_wrprotect(entry);
entry = pmd_mkclean(entry); entry = pmd_mkclean(entry);
set_pmd_at(vma->vm_mm, address, pmd, entry); set_pmd_at(vma->vm_mm, pvmw.address, pmd, entry);
ret = 1; ret = 1;
#else #else
/* unexpected pmd-mapped page? */ /* unexpected pmd-mapped page? */
...@@ -926,9 +926,14 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, ...@@ -926,9 +926,14 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
} }
if (ret) { if (ret) {
mmu_notifier_invalidate_page(vma->vm_mm, address);
(*cleaned)++; (*cleaned)++;
invalidation_needed = true;
}
} }
if (invalidation_needed) {
mmu_notifier_invalidate_range(vma->vm_mm, address,
address + (1UL << compound_order(page)));
} }
return true; return true;
...@@ -1323,7 +1328,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -1323,7 +1328,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
}; };
pte_t pteval; pte_t pteval;
struct page *subpage; struct page *subpage;
bool ret = true; bool ret = true, invalidation_needed = false;
enum ttu_flags flags = (enum ttu_flags)arg; enum ttu_flags flags = (enum ttu_flags)arg;
/* munlock has nothing to gain from examining un-locked vmas */ /* munlock has nothing to gain from examining un-locked vmas */
...@@ -1363,11 +1368,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -1363,11 +1368,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
VM_BUG_ON_PAGE(!pvmw.pte, page); VM_BUG_ON_PAGE(!pvmw.pte, page);
subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
address = pvmw.address;
if (!(flags & TTU_IGNORE_ACCESS)) { if (!(flags & TTU_IGNORE_ACCESS)) {
if (ptep_clear_flush_young_notify(vma, address, if (ptep_clear_flush_young_notify(vma, pvmw.address,
pvmw.pte)) { pvmw.pte)) {
ret = false; ret = false;
page_vma_mapped_walk_done(&pvmw); page_vma_mapped_walk_done(&pvmw);
...@@ -1376,7 +1379,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -1376,7 +1379,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
} }
/* Nuke the page table entry. */ /* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); flush_cache_page(vma, pvmw.address, pte_pfn(*pvmw.pte));
if (should_defer_flush(mm, flags)) { if (should_defer_flush(mm, flags)) {
/* /*
* We clear the PTE but do not flush so potentially * We clear the PTE but do not flush so potentially
...@@ -1386,11 +1389,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -1386,11 +1389,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* transition on a cached TLB entry is written through * transition on a cached TLB entry is written through
* and traps if the PTE is unmapped. * and traps if the PTE is unmapped.
*/ */
pteval = ptep_get_and_clear(mm, address, pvmw.pte); pteval = ptep_get_and_clear(mm, pvmw.address,
pvmw.pte);
set_tlb_ubc_flush_pending(mm, pte_dirty(pteval)); set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
} else { } else {
pteval = ptep_clear_flush(vma, address, pvmw.pte); pteval = ptep_clear_flush(vma, pvmw.address, pvmw.pte);
} }
/* Move the dirty bit to the page. Now the pte is gone. */ /* Move the dirty bit to the page. Now the pte is gone. */
...@@ -1405,12 +1409,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -1405,12 +1409,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
if (PageHuge(page)) { if (PageHuge(page)) {
int nr = 1 << compound_order(page); int nr = 1 << compound_order(page);
hugetlb_count_sub(nr, mm); hugetlb_count_sub(nr, mm);
set_huge_swap_pte_at(mm, address, set_huge_swap_pte_at(mm, pvmw.address,
pvmw.pte, pteval, pvmw.pte, pteval,
vma_mmu_pagesize(vma)); vma_mmu_pagesize(vma));
} else { } else {
dec_mm_counter(mm, mm_counter(page)); dec_mm_counter(mm, mm_counter(page));
set_pte_at(mm, address, pvmw.pte, pteval); set_pte_at(mm, pvmw.address, pvmw.pte, pteval);
} }
} else if (pte_unused(pteval)) { } else if (pte_unused(pteval)) {
...@@ -1434,7 +1438,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -1434,7 +1438,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
swp_pte = swp_entry_to_pte(entry); swp_pte = swp_entry_to_pte(entry);
if (pte_soft_dirty(pteval)) if (pte_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte); swp_pte = pte_swp_mksoft_dirty(swp_pte);
set_pte_at(mm, address, pvmw.pte, swp_pte); set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
} else if (PageAnon(page)) { } else if (PageAnon(page)) {
swp_entry_t entry = { .val = page_private(subpage) }; swp_entry_t entry = { .val = page_private(subpage) };
pte_t swp_pte; pte_t swp_pte;
...@@ -1460,7 +1464,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -1460,7 +1464,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* If the page was redirtied, it cannot be * If the page was redirtied, it cannot be
* discarded. Remap the page to page table. * discarded. Remap the page to page table.
*/ */
set_pte_at(mm, address, pvmw.pte, pteval); set_pte_at(mm, pvmw.address, pvmw.pte, pteval);
SetPageSwapBacked(page); SetPageSwapBacked(page);
ret = false; ret = false;
page_vma_mapped_walk_done(&pvmw); page_vma_mapped_walk_done(&pvmw);
...@@ -1468,7 +1472,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -1468,7 +1472,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
} }
if (swap_duplicate(entry) < 0) { if (swap_duplicate(entry) < 0) {
set_pte_at(mm, address, pvmw.pte, pteval); set_pte_at(mm, pvmw.address, pvmw.pte, pteval);
ret = false; ret = false;
page_vma_mapped_walk_done(&pvmw); page_vma_mapped_walk_done(&pvmw);
break; break;
...@@ -1484,14 +1488,18 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -1484,14 +1488,18 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
swp_pte = swp_entry_to_pte(entry); swp_pte = swp_entry_to_pte(entry);
if (pte_soft_dirty(pteval)) if (pte_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte); swp_pte = pte_swp_mksoft_dirty(swp_pte);
set_pte_at(mm, address, pvmw.pte, swp_pte); set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
} else } else
dec_mm_counter(mm, mm_counter_file(page)); dec_mm_counter(mm, mm_counter_file(page));
discard: discard:
page_remove_rmap(subpage, PageHuge(page)); page_remove_rmap(subpage, PageHuge(page));
put_page(page); put_page(page);
mmu_notifier_invalidate_page(mm, address); invalidation_needed = true;
} }
if (invalidation_needed)
mmu_notifier_invalidate_range(mm, address,
address + (1UL << compound_order(page)));
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment