Commit b0b9b3df authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

mm: stop leaking PageTables

4.10-rc loadtest (even on x86, and even without THPCache) fails with
"fork: Cannot allocate memory" or some such; and /proc/meminfo shows
PageTables growing.

Commit 953c66c2 ("mm: THP page cache support for ppc64") that got
merged in rc1 removed the freeing of an unused preallocated pagetable
after do_fault_around() has called map_pages().

This is usually a good optimization, so that the followup doesn't have
to reallocate one; but it's not sufficient to shift the freeing into
alloc_set_pte(), since there are failure cases (most commonly
VM_FAULT_RETRY) which never reach finish_fault().

Check and free it at the outer level in do_fault(), then we don't need
to worry in alloc_set_pte(), and can restore that to how it was (I
cannot find any reason to pte_free() under lock as it was doing).

And fix a separate pagetable leak, or crash, introduced by the same
change, that could only show up on some ppc64: why does do_set_pmd()'s
failure case attempt to withdraw a pagetable when it never deposited
one, at the same time overwriting (so leaking) the vmf->prealloc_pte?
Residue of an earlier implementation, perhaps? Delete it.

Fixes: 953c66c2 ("mm: THP page cache support for ppc64")
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 87bc6107
...@@ -3008,13 +3008,6 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page) ...@@ -3008,13 +3008,6 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page)
ret = 0; ret = 0;
count_vm_event(THP_FILE_MAPPED); count_vm_event(THP_FILE_MAPPED);
out: out:
/*
* If we are going to fallback to pte mapping, do a
* withdraw with pmd lock held.
*/
if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK)
vmf->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm,
vmf->pmd);
spin_unlock(vmf->ptl); spin_unlock(vmf->ptl);
return ret; return ret;
} }
...@@ -3055,20 +3048,18 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, ...@@ -3055,20 +3048,18 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
ret = do_set_pmd(vmf, page); ret = do_set_pmd(vmf, page);
if (ret != VM_FAULT_FALLBACK) if (ret != VM_FAULT_FALLBACK)
goto fault_handled; return ret;
} }
if (!vmf->pte) { if (!vmf->pte) {
ret = pte_alloc_one_map(vmf); ret = pte_alloc_one_map(vmf);
if (ret) if (ret)
goto fault_handled; return ret;
} }
/* Re-check under ptl */ /* Re-check under ptl */
if (unlikely(!pte_none(*vmf->pte))) { if (unlikely(!pte_none(*vmf->pte)))
ret = VM_FAULT_NOPAGE; return VM_FAULT_NOPAGE;
goto fault_handled;
}
flush_icache_page(vma, page); flush_icache_page(vma, page);
entry = mk_pte(page, vma->vm_page_prot); entry = mk_pte(page, vma->vm_page_prot);
...@@ -3088,15 +3079,8 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, ...@@ -3088,15 +3079,8 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
/* no need to invalidate: a not-present page won't be cached */ /* no need to invalidate: a not-present page won't be cached */
update_mmu_cache(vma, vmf->address, vmf->pte); update_mmu_cache(vma, vmf->address, vmf->pte);
ret = 0;
fault_handled: return 0;
/* preallocated pagetable is unused: free it */
if (vmf->prealloc_pte) {
pte_free(vmf->vma->vm_mm, vmf->prealloc_pte);
vmf->prealloc_pte = 0;
}
return ret;
} }
...@@ -3360,15 +3344,24 @@ static int do_shared_fault(struct vm_fault *vmf) ...@@ -3360,15 +3344,24 @@ static int do_shared_fault(struct vm_fault *vmf)
static int do_fault(struct vm_fault *vmf) static int do_fault(struct vm_fault *vmf)
{ {
struct vm_area_struct *vma = vmf->vma; struct vm_area_struct *vma = vmf->vma;
int ret;
/* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
if (!vma->vm_ops->fault) if (!vma->vm_ops->fault)
return VM_FAULT_SIGBUS; ret = VM_FAULT_SIGBUS;
if (!(vmf->flags & FAULT_FLAG_WRITE)) else if (!(vmf->flags & FAULT_FLAG_WRITE))
return do_read_fault(vmf); ret = do_read_fault(vmf);
if (!(vma->vm_flags & VM_SHARED)) else if (!(vma->vm_flags & VM_SHARED))
return do_cow_fault(vmf); ret = do_cow_fault(vmf);
return do_shared_fault(vmf); else
ret = do_shared_fault(vmf);
/* preallocated pagetable is unused: free it */
if (vmf->prealloc_pte) {
pte_free(vma->vm_mm, vmf->prealloc_pte);
vmf->prealloc_pte = 0;
}
return ret;
} }
static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment