Commit 6041c691 authored by Peter Xu's avatar Peter Xu Committed by Andrew Morton

mm/hugetlb: take care of UFFDIO_COPY_MODE_WP

Pass the wp_copy variable into hugetlb_mcopy_atomic_pte() thoughout the
stack.  Apply the UFFD_WP bit if UFFDIO_COPY_MODE_WP is with UFFDIO_COPY.

Hugetlb pages are only managed by hugetlbfs, so we're safe even without
setting dirty bit in the huge pte if the page is installed as read-only. 
However we'd better still keep the dirty bit set for a read-only
UFFDIO_COPY pte (when UFFDIO_COPY_MODE_WP bit is set), not only to match
what we do with shmem, but also because the page does contain dirty data
that the kernel just copied from the userspace.

Link: https://lkml.kernel.org/r/20220405014904.14643-1-peterx@redhat.comSigned-off-by: default avatarPeter Xu <peterx@redhat.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 166f3ecc
...@@ -160,7 +160,8 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, ...@@ -160,7 +160,8 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
unsigned long dst_addr, unsigned long dst_addr,
unsigned long src_addr, unsigned long src_addr,
enum mcopy_atomic_mode mode, enum mcopy_atomic_mode mode,
struct page **pagep); struct page **pagep,
bool wp_copy);
#endif /* CONFIG_USERFAULTFD */ #endif /* CONFIG_USERFAULTFD */
bool hugetlb_reserve_pages(struct inode *inode, long from, long to, bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
struct vm_area_struct *vma, struct vm_area_struct *vma,
...@@ -356,7 +357,8 @@ static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -356,7 +357,8 @@ static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
unsigned long dst_addr, unsigned long dst_addr,
unsigned long src_addr, unsigned long src_addr,
enum mcopy_atomic_mode mode, enum mcopy_atomic_mode mode,
struct page **pagep) struct page **pagep,
bool wp_copy)
{ {
BUG(); BUG();
return 0; return 0;
......
...@@ -5821,7 +5821,8 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -5821,7 +5821,8 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
unsigned long dst_addr, unsigned long dst_addr,
unsigned long src_addr, unsigned long src_addr,
enum mcopy_atomic_mode mode, enum mcopy_atomic_mode mode,
struct page **pagep) struct page **pagep,
bool wp_copy)
{ {
bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE); bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE);
struct hstate *h = hstate_vma(dst_vma); struct hstate *h = hstate_vma(dst_vma);
...@@ -5951,7 +5952,12 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -5951,7 +5952,12 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
goto out_release_unlock; goto out_release_unlock;
ret = -EEXIST; ret = -EEXIST;
if (!huge_pte_none(huge_ptep_get(dst_pte))) /*
* We allow to overwrite a pte marker: consider when both MISSING|WP
* registered, we firstly wr-protect a none pte which has no page cache
* page backing it, then access the page.
*/
if (!huge_pte_none_mostly(huge_ptep_get(dst_pte)))
goto out_release_unlock; goto out_release_unlock;
if (vm_shared) { if (vm_shared) {
...@@ -5961,17 +5967,28 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -5961,17 +5967,28 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
hugepage_add_new_anon_rmap(page, dst_vma, dst_addr); hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
} }
/* For CONTINUE on a non-shared VMA, don't set VM_WRITE for CoW. */ /*
if (is_continue && !vm_shared) * For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY
* with wp flag set, don't set pte write bit.
*/
if (wp_copy || (is_continue && !vm_shared))
writable = 0; writable = 0;
else else
writable = dst_vma->vm_flags & VM_WRITE; writable = dst_vma->vm_flags & VM_WRITE;
_dst_pte = make_huge_pte(dst_vma, page, writable); _dst_pte = make_huge_pte(dst_vma, page, writable);
if (writable) /*
_dst_pte = huge_pte_mkdirty(_dst_pte); * Always mark UFFDIO_COPY page dirty; note that this may not be
* extremely important for hugetlbfs for now since swapping is not
* supported, but we should still be clear in that this page cannot be
* thrown away at will, even if write bit not set.
*/
_dst_pte = huge_pte_mkdirty(_dst_pte);
_dst_pte = pte_mkyoung(_dst_pte); _dst_pte = pte_mkyoung(_dst_pte);
if (wp_copy)
_dst_pte = huge_pte_mkuffd_wp(_dst_pte);
set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
(void)huge_ptep_set_access_flags(dst_vma, dst_addr, dst_pte, _dst_pte, (void)huge_ptep_set_access_flags(dst_vma, dst_addr, dst_pte, _dst_pte,
......
...@@ -305,7 +305,8 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, ...@@ -305,7 +305,8 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
unsigned long dst_start, unsigned long dst_start,
unsigned long src_start, unsigned long src_start,
unsigned long len, unsigned long len,
enum mcopy_atomic_mode mode) enum mcopy_atomic_mode mode,
bool wp_copy)
{ {
int vm_shared = dst_vma->vm_flags & VM_SHARED; int vm_shared = dst_vma->vm_flags & VM_SHARED;
ssize_t err; ssize_t err;
...@@ -393,7 +394,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, ...@@ -393,7 +394,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
} }
if (mode != MCOPY_ATOMIC_CONTINUE && if (mode != MCOPY_ATOMIC_CONTINUE &&
!huge_pte_none(huge_ptep_get(dst_pte))) { !huge_pte_none_mostly(huge_ptep_get(dst_pte))) {
err = -EEXIST; err = -EEXIST;
mutex_unlock(&hugetlb_fault_mutex_table[hash]); mutex_unlock(&hugetlb_fault_mutex_table[hash]);
i_mmap_unlock_read(mapping); i_mmap_unlock_read(mapping);
...@@ -401,7 +402,8 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, ...@@ -401,7 +402,8 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
} }
err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
dst_addr, src_addr, mode, &page); dst_addr, src_addr, mode, &page,
wp_copy);
mutex_unlock(&hugetlb_fault_mutex_table[hash]); mutex_unlock(&hugetlb_fault_mutex_table[hash]);
i_mmap_unlock_read(mapping); i_mmap_unlock_read(mapping);
...@@ -456,7 +458,8 @@ extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, ...@@ -456,7 +458,8 @@ extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
unsigned long dst_start, unsigned long dst_start,
unsigned long src_start, unsigned long src_start,
unsigned long len, unsigned long len,
enum mcopy_atomic_mode mode); enum mcopy_atomic_mode mode,
bool wp_copy);
#endif /* CONFIG_HUGETLB_PAGE */ #endif /* CONFIG_HUGETLB_PAGE */
static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm, static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
...@@ -576,7 +579,8 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, ...@@ -576,7 +579,8 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
*/ */
if (is_vm_hugetlb_page(dst_vma)) if (is_vm_hugetlb_page(dst_vma))
return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start, return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
src_start, len, mcopy_mode); src_start, len, mcopy_mode,
wp_copy);
if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
goto out_unlock; goto out_unlock;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment