Commit 6b251fc9 authored by Andrea Arcangeli's avatar Andrea Arcangeli Committed by Linus Torvalds

userfaultfd: call handle_userfault() for userfaultfd_missing() faults

This is where the page faults must be modified to call
handle_userfault() if userfaultfd_missing() is true (so if the
vma->vm_flags had VM_UFFD_MISSING set).

handle_userfault() then takes care of blocking the page fault and
delivering it to userland.

The fault flags must also be passed as parameter so the "read|write"
kind of fault can be passed to userland.
Signed-off-by: default avatarAndrea Arcangeli <aarcange@redhat.com>
Acked-by: default avatarPavel Emelyanov <xemul@parallels.com>
Cc: Sanidhya Kashyap <sanidhya.gatech@gmail.com>
Cc: zhang.zhanghailiang@huawei.com
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Andres Lagar-Cavilla <andreslc@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Feiner <pfeiner@google.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: "Huangpeng (Peter)" <peter.huangpeng@huawei.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 16ba6f81
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/migrate.h> #include <linux/migrate.h>
#include <linux/hashtable.h> #include <linux/hashtable.h>
#include <linux/userfaultfd_k.h>
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
...@@ -717,7 +718,8 @@ static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot) ...@@ -717,7 +718,8 @@ static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *vma,
unsigned long haddr, pmd_t *pmd, unsigned long haddr, pmd_t *pmd,
struct page *page, gfp_t gfp) struct page *page, gfp_t gfp,
unsigned int flags)
{ {
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
pgtable_t pgtable; pgtable_t pgtable;
...@@ -725,12 +727,16 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, ...@@ -725,12 +727,16 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
VM_BUG_ON_PAGE(!PageCompound(page), page); VM_BUG_ON_PAGE(!PageCompound(page), page);
if (mem_cgroup_try_charge(page, mm, gfp, &memcg)) if (mem_cgroup_try_charge(page, mm, gfp, &memcg)) {
return VM_FAULT_OOM; put_page(page);
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
}
pgtable = pte_alloc_one(mm, haddr); pgtable = pte_alloc_one(mm, haddr);
if (unlikely(!pgtable)) { if (unlikely(!pgtable)) {
mem_cgroup_cancel_charge(page, memcg); mem_cgroup_cancel_charge(page, memcg);
put_page(page);
return VM_FAULT_OOM; return VM_FAULT_OOM;
} }
...@@ -750,6 +756,21 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, ...@@ -750,6 +756,21 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
pte_free(mm, pgtable); pte_free(mm, pgtable);
} else { } else {
pmd_t entry; pmd_t entry;
/* Deliver the page fault to userland */
if (userfaultfd_missing(vma)) {
int ret;
spin_unlock(ptl);
mem_cgroup_cancel_charge(page, memcg);
put_page(page);
pte_free(mm, pgtable);
ret = handle_userfault(vma, haddr, flags,
VM_UFFD_MISSING);
VM_BUG_ON(ret & VM_FAULT_FALLBACK);
return ret;
}
entry = mk_huge_pmd(page, vma->vm_page_prot); entry = mk_huge_pmd(page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
page_add_new_anon_rmap(page, vma, haddr); page_add_new_anon_rmap(page, vma, haddr);
...@@ -760,6 +781,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, ...@@ -760,6 +781,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
atomic_long_inc(&mm->nr_ptes); atomic_long_inc(&mm->nr_ptes);
spin_unlock(ptl); spin_unlock(ptl);
count_vm_event(THP_FAULT_ALLOC);
} }
return 0; return 0;
...@@ -771,19 +793,16 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp) ...@@ -771,19 +793,16 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
} }
/* Caller must hold page table lock. */ /* Caller must hold page table lock. */
static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, static void set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
struct page *zero_page) struct page *zero_page)
{ {
pmd_t entry; pmd_t entry;
if (!pmd_none(*pmd))
return false;
entry = mk_pmd(zero_page, vma->vm_page_prot); entry = mk_pmd(zero_page, vma->vm_page_prot);
entry = pmd_mkhuge(entry); entry = pmd_mkhuge(entry);
pgtable_trans_huge_deposit(mm, pmd, pgtable); pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, haddr, pmd, entry); set_pmd_at(mm, haddr, pmd, entry);
atomic_long_inc(&mm->nr_ptes); atomic_long_inc(&mm->nr_ptes);
return true;
} }
int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
...@@ -806,6 +825,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -806,6 +825,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
pgtable_t pgtable; pgtable_t pgtable;
struct page *zero_page; struct page *zero_page;
bool set; bool set;
int ret;
pgtable = pte_alloc_one(mm, haddr); pgtable = pte_alloc_one(mm, haddr);
if (unlikely(!pgtable)) if (unlikely(!pgtable))
return VM_FAULT_OOM; return VM_FAULT_OOM;
...@@ -816,14 +836,28 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -816,14 +836,28 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
} }
ptl = pmd_lock(mm, pmd); ptl = pmd_lock(mm, pmd);
set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, ret = 0;
zero_page); set = false;
spin_unlock(ptl); if (pmd_none(*pmd)) {
if (userfaultfd_missing(vma)) {
spin_unlock(ptl);
ret = handle_userfault(vma, haddr, flags,
VM_UFFD_MISSING);
VM_BUG_ON(ret & VM_FAULT_FALLBACK);
} else {
set_huge_zero_page(pgtable, mm, vma,
haddr, pmd,
zero_page);
spin_unlock(ptl);
set = true;
}
} else
spin_unlock(ptl);
if (!set) { if (!set) {
pte_free(mm, pgtable); pte_free(mm, pgtable);
put_huge_zero_page(); put_huge_zero_page();
} }
return 0; return ret;
} }
gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0); gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
...@@ -831,14 +865,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -831,14 +865,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
count_vm_event(THP_FAULT_FALLBACK); count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
} }
if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp))) { return __do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp, flags);
put_page(page);
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
}
count_vm_event(THP_FAULT_ALLOC);
return 0;
} }
int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
...@@ -873,16 +900,14 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ...@@ -873,16 +900,14 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
*/ */
if (is_huge_zero_pmd(pmd)) { if (is_huge_zero_pmd(pmd)) {
struct page *zero_page; struct page *zero_page;
bool set;
/* /*
* get_huge_zero_page() will never allocate a new page here, * get_huge_zero_page() will never allocate a new page here,
* since we already have a zero page to copy. It just takes a * since we already have a zero page to copy. It just takes a
* reference. * reference.
*/ */
zero_page = get_huge_zero_page(); zero_page = get_huge_zero_page();
set = set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd, set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
zero_page); zero_page);
BUG_ON(!set); /* unexpected !pmd_none(dst_pmd) */
ret = 0; ret = 0;
goto out_unlock; goto out_unlock;
} }
......
...@@ -61,6 +61,7 @@ ...@@ -61,6 +61,7 @@
#include <linux/string.h> #include <linux/string.h>
#include <linux/dma-debug.h> #include <linux/dma-debug.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/userfaultfd_k.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
...@@ -2685,6 +2686,12 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2685,6 +2686,12 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
page_table = pte_offset_map_lock(mm, pmd, address, &ptl); page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
if (!pte_none(*page_table)) if (!pte_none(*page_table))
goto unlock; goto unlock;
/* Deliver the page fault to userland, check inside PT lock */
if (userfaultfd_missing(vma)) {
pte_unmap_unlock(page_table, ptl);
return handle_userfault(vma, address, flags,
VM_UFFD_MISSING);
}
goto setpte; goto setpte;
} }
...@@ -2713,6 +2720,15 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2713,6 +2720,15 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (!pte_none(*page_table)) if (!pte_none(*page_table))
goto release; goto release;
/* Deliver the page fault to userland, check inside PT lock */
if (userfaultfd_missing(vma)) {
pte_unmap_unlock(page_table, ptl);
mem_cgroup_cancel_charge(page, memcg);
page_cache_release(page);
return handle_userfault(vma, address, flags,
VM_UFFD_MISSING);
}
inc_mm_counter_fast(mm, MM_ANONPAGES); inc_mm_counter_fast(mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address); page_add_new_anon_rmap(page, vma, address);
mem_cgroup_commit_charge(page, memcg, false); mem_cgroup_commit_charge(page, memcg, false);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment