Commit 5338a937 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

mm: thp: fix DEBUG_PAGEALLOC oops in copy_page_rep()

Trinity has for over a year been reporting a CONFIG_DEBUG_PAGEALLOC oops
in copy_page_rep() called from copy_user_huge_page() called from
do_huge_pmd_wp_page().

I believe this is a DEBUG_PAGEALLOC false positive, due to the source
page being split, and a tail page freed, while copy is in progress; and
not a problem without DEBUG_PAGEALLOC, since the pmd_same() check will
prevent a miscopy from being made visible.

Fix by adding get_user_huge_page() and put_user_huge_page(): reducing to
the usual get_page() and put_page() on head page in the usual config;
but get and put references to all of the tail pages when
DEBUG_PAGEALLOC.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Acked-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: default avatarSasha Levin <sasha.levin@oracle.com>
Tested-by: default avatarSasha Levin <sasha.levin@oracle.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent ed235875
...@@ -941,6 +941,37 @@ void huge_pmd_set_accessed(struct mm_struct *mm, ...@@ -941,6 +941,37 @@ void huge_pmd_set_accessed(struct mm_struct *mm,
spin_unlock(ptl); spin_unlock(ptl);
} }
/*
* Save CONFIG_DEBUG_PAGEALLOC from faulting falsely on tail pages
* during copy_user_huge_page()'s copy_page_rep(): in the case when
* the source page gets split and a tail freed before copy completes.
* Called under pmd_lock of checked pmd, so safe from splitting itself.
*/
static void get_user_huge_page(struct page *page)
{
if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC)) {
struct page *endpage = page + HPAGE_PMD_NR;
atomic_add(HPAGE_PMD_NR, &page->_count);
while (++page < endpage)
get_huge_page_tail(page);
} else {
get_page(page);
}
}
static void put_user_huge_page(struct page *page)
{
if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC)) {
struct page *endpage = page + HPAGE_PMD_NR;
while (page < endpage)
put_page(page++);
} else {
put_page(page);
}
}
static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *vma,
unsigned long address, unsigned long address,
...@@ -1074,7 +1105,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1074,7 +1105,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
ret |= VM_FAULT_WRITE; ret |= VM_FAULT_WRITE;
goto out_unlock; goto out_unlock;
} }
get_page(page); get_user_huge_page(page);
spin_unlock(ptl); spin_unlock(ptl);
alloc: alloc:
if (transparent_hugepage_enabled(vma) && if (transparent_hugepage_enabled(vma) &&
...@@ -1095,7 +1126,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1095,7 +1126,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
split_huge_page(page); split_huge_page(page);
ret |= VM_FAULT_FALLBACK; ret |= VM_FAULT_FALLBACK;
} }
put_page(page); put_user_huge_page(page);
} }
count_vm_event(THP_FAULT_FALLBACK); count_vm_event(THP_FAULT_FALLBACK);
goto out; goto out;
...@@ -1105,7 +1136,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1105,7 +1136,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
put_page(new_page); put_page(new_page);
if (page) { if (page) {
split_huge_page(page); split_huge_page(page);
put_page(page); put_user_huge_page(page);
} else } else
split_huge_page_pmd(vma, address, pmd); split_huge_page_pmd(vma, address, pmd);
ret |= VM_FAULT_FALLBACK; ret |= VM_FAULT_FALLBACK;
...@@ -1127,7 +1158,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1127,7 +1158,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
spin_lock(ptl); spin_lock(ptl);
if (page) if (page)
put_page(page); put_user_huge_page(page);
if (unlikely(!pmd_same(*pmd, orig_pmd))) { if (unlikely(!pmd_same(*pmd, orig_pmd))) {
spin_unlock(ptl); spin_unlock(ptl);
mem_cgroup_uncharge_page(new_page); mem_cgroup_uncharge_page(new_page);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment