Commit 52526ca7 authored by Muhammad Usama Anjum's avatar Muhammad Usama Anjum Committed by Andrew Morton

fs/proc/task_mmu: implement IOCTL to get and optionally clear info about PTEs

The PAGEMAP_SCAN IOCTL on the pagemap file can be used to get or optionally
clear the info about page table entries. The following operations are
supported in this IOCTL:
- Scan the address range and get the memory ranges matching the provided
  criteria. This is performed when the output buffer is specified.
- Write-protect the pages. The PM_SCAN_WP_MATCHING is used to write-protect
  the pages of interest. The PM_SCAN_CHECK_WPASYNC aborts the operation if
  non-Async Write Protected pages are found. The ``PM_SCAN_WP_MATCHING``
  can be used with or without PM_SCAN_CHECK_WPASYNC.
- Both of those operations can be combined into one atomic operation where
  we can get and write protect the pages as well.

Following flags about pages are currently supported:
- PAGE_IS_WPALLOWED - Page has async-write-protection enabled
- PAGE_IS_WRITTEN - Page has been written to from the time it was write protected
- PAGE_IS_FILE - Page is file backed
- PAGE_IS_PRESENT - Page is present in the memory
- PAGE_IS_SWAPPED - Page is in swapped
- PAGE_IS_PFNZERO - Page has zero PFN
- PAGE_IS_HUGE - Page is THP or Hugetlb backed

This IOCTL can be extended to get information about more PTE bits. The
entire address range passed by user [start, end) is scanned until either
the user provided buffer is full or max_pages have been found.

[akpm@linux-foundation.org: update it for "mm: hugetlb: add huge page size param to set_huge_pte_at()"]
[akpm@linux-foundation.org: fix CONFIG_HUGETLB_PAGE=n warning]
[arnd@arndb.de: hide unused pagemap_scan_backout_range() function]
  Link: https://lkml.kernel.org/r/20230927060257.2975412-1-arnd@kernel.org
[sfr@canb.auug.org.au: fix "fs/proc/task_mmu: hide unused pagemap_scan_backout_range() function"]
  Link: https://lkml.kernel.org/r/20230928092223.0625c6bf@canb.auug.org.au
Link: https://lkml.kernel.org/r/20230821141518.870589-3-usama.anjum@collabora.comSigned-off-by: default avatarMuhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: default avatarMichał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: default avatarArnd Bergmann <arnd@arndb.de>
Signed-off-by: default avatarStephen Rothwell <sfr@canb.auug.org.au>
Reviewed-by: default avatarAndrei Vagin <avagin@gmail.com>
Reviewed-by: default avatarMichał Mirosław <mirq-linux@rere.qmqm.pl>
Cc: Alex Sierra <alex.sierra@amd.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Miroslaw <emmir@google.com>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Paul Gofman <pgofman@codeweavers.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yun Zhou <yun.zhou@windriver.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent d61ea1cb
......@@ -20,6 +20,8 @@
#include <linux/shmem_fs.h>
#include <linux/uaccess.h>
#include <linux/pkeys.h>
#include <linux/minmax.h>
#include <linux/overflow.h>
#include <asm/elf.h>
#include <asm/tlb.h>
......@@ -1761,11 +1763,701 @@ static int pagemap_release(struct inode *inode, struct file *file)
return 0;
}
#define PM_SCAN_CATEGORIES (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \
PAGE_IS_FILE | PAGE_IS_PRESENT | \
PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \
PAGE_IS_HUGE)
#define PM_SCAN_FLAGS (PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC)
struct pagemap_scan_private {
struct pm_scan_arg arg;
unsigned long masks_of_interest, cur_vma_category;
struct page_region *vec_buf;
unsigned long vec_buf_len, vec_buf_index, found_pages;
struct page_region __user *vec_out;
};
static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
struct vm_area_struct *vma,
unsigned long addr, pte_t pte)
{
unsigned long categories = 0;
if (pte_present(pte)) {
struct page *page;
categories |= PAGE_IS_PRESENT;
if (!pte_uffd_wp(pte))
categories |= PAGE_IS_WRITTEN;
if (p->masks_of_interest & PAGE_IS_FILE) {
page = vm_normal_page(vma, addr, pte);
if (page && !PageAnon(page))
categories |= PAGE_IS_FILE;
}
if (is_zero_pfn(pte_pfn(pte)))
categories |= PAGE_IS_PFNZERO;
} else if (is_swap_pte(pte)) {
swp_entry_t swp;
categories |= PAGE_IS_SWAPPED;
if (!pte_swp_uffd_wp_any(pte))
categories |= PAGE_IS_WRITTEN;
if (p->masks_of_interest & PAGE_IS_FILE) {
swp = pte_to_swp_entry(pte);
if (is_pfn_swap_entry(swp) &&
!PageAnon(pfn_swap_entry_to_page(swp)))
categories |= PAGE_IS_FILE;
}
}
return categories;
}
static void make_uffd_wp_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t *pte)
{
pte_t ptent = ptep_get(pte);
if (pte_present(ptent)) {
pte_t old_pte;
old_pte = ptep_modify_prot_start(vma, addr, pte);
ptent = pte_mkuffd_wp(ptent);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
} else if (is_swap_pte(ptent)) {
ptent = pte_swp_mkuffd_wp(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
} else {
set_pte_at(vma->vm_mm, addr, pte,
make_pte_marker(PTE_MARKER_UFFD_WP));
}
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
struct vm_area_struct *vma,
unsigned long addr, pmd_t pmd)
{
unsigned long categories = PAGE_IS_HUGE;
if (pmd_present(pmd)) {
struct page *page;
categories |= PAGE_IS_PRESENT;
if (!pmd_uffd_wp(pmd))
categories |= PAGE_IS_WRITTEN;
if (p->masks_of_interest & PAGE_IS_FILE) {
page = vm_normal_page_pmd(vma, addr, pmd);
if (page && !PageAnon(page))
categories |= PAGE_IS_FILE;
}
if (is_zero_pfn(pmd_pfn(pmd)))
categories |= PAGE_IS_PFNZERO;
} else if (is_swap_pmd(pmd)) {
swp_entry_t swp;
categories |= PAGE_IS_SWAPPED;
if (!pmd_swp_uffd_wp(pmd))
categories |= PAGE_IS_WRITTEN;
if (p->masks_of_interest & PAGE_IS_FILE) {
swp = pmd_to_swp_entry(pmd);
if (is_pfn_swap_entry(swp) &&
!PageAnon(pfn_swap_entry_to_page(swp)))
categories |= PAGE_IS_FILE;
}
}
return categories;
}
static void make_uffd_wp_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
pmd_t old, pmd = *pmdp;
if (pmd_present(pmd)) {
old = pmdp_invalidate_ad(vma, addr, pmdp);
pmd = pmd_mkuffd_wp(old);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
} else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
pmd = pmd_swp_mkuffd_wp(pmd);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
}
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#ifdef CONFIG_HUGETLB_PAGE
static unsigned long pagemap_hugetlb_category(pte_t pte)
{
unsigned long categories = PAGE_IS_HUGE;
/*
* According to pagemap_hugetlb_range(), file-backed HugeTLB
* page cannot be swapped. So PAGE_IS_FILE is not checked for
* swapped pages.
*/
if (pte_present(pte)) {
categories |= PAGE_IS_PRESENT;
if (!huge_pte_uffd_wp(pte))
categories |= PAGE_IS_WRITTEN;
if (!PageAnon(pte_page(pte)))
categories |= PAGE_IS_FILE;
if (is_zero_pfn(pte_pfn(pte)))
categories |= PAGE_IS_PFNZERO;
} else if (is_swap_pte(pte)) {
categories |= PAGE_IS_SWAPPED;
if (!pte_swp_uffd_wp_any(pte))
categories |= PAGE_IS_WRITTEN;
}
return categories;
}
static void make_uffd_wp_huge_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t ptent)
{
unsigned long psize;
if (is_hugetlb_entry_hwpoisoned(ptent) || is_pte_marker(ptent))
return;
psize = huge_page_size(hstate_vma(vma));
if (is_hugetlb_entry_migration(ptent))
set_huge_pte_at(vma->vm_mm, addr, ptep,
pte_swp_mkuffd_wp(ptent), psize);
else if (!huge_pte_none(ptent))
huge_ptep_modify_prot_commit(vma, addr, ptep, ptent,
huge_pte_mkuffd_wp(ptent));
else
set_huge_pte_at(vma->vm_mm, addr, ptep,
make_pte_marker(PTE_MARKER_UFFD_WP), psize);
}
#endif /* CONFIG_HUGETLB_PAGE */
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
static void pagemap_scan_backout_range(struct pagemap_scan_private *p,
unsigned long addr, unsigned long end)
{
struct page_region *cur_buf = &p->vec_buf[p->vec_buf_index];
if (cur_buf->start != addr)
cur_buf->end = addr;
else
cur_buf->start = cur_buf->end = 0;
p->found_pages -= (end - addr) / PAGE_SIZE;
}
#endif
static bool pagemap_scan_is_interesting_page(unsigned long categories,
const struct pagemap_scan_private *p)
{
categories ^= p->arg.category_inverted;
if ((categories & p->arg.category_mask) != p->arg.category_mask)
return false;
if (p->arg.category_anyof_mask && !(categories & p->arg.category_anyof_mask))
return false;
return true;
}
static bool pagemap_scan_is_interesting_vma(unsigned long categories,
const struct pagemap_scan_private *p)
{
unsigned long required = p->arg.category_mask & PAGE_IS_WPALLOWED;
categories ^= p->arg.category_inverted;
if ((categories & required) != required)
return false;
return true;
}
static int pagemap_scan_test_walk(unsigned long start, unsigned long end,
struct mm_walk *walk)
{
struct pagemap_scan_private *p = walk->private;
struct vm_area_struct *vma = walk->vma;
unsigned long vma_category = 0;
if (userfaultfd_wp_async(vma) && userfaultfd_wp_use_markers(vma))
vma_category |= PAGE_IS_WPALLOWED;
else if (p->arg.flags & PM_SCAN_CHECK_WPASYNC)
return -EPERM;
if (vma->vm_flags & VM_PFNMAP)
return 1;
if (!pagemap_scan_is_interesting_vma(vma_category, p))
return 1;
p->cur_vma_category = vma_category;
return 0;
}
static bool pagemap_scan_push_range(unsigned long categories,
struct pagemap_scan_private *p,
unsigned long addr, unsigned long end)
{
struct page_region *cur_buf = &p->vec_buf[p->vec_buf_index];
/*
* When there is no output buffer provided at all, the sentinel values
* won't match here. There is no other way for `cur_buf->end` to be
* non-zero other than it being non-empty.
*/
if (addr == cur_buf->end && categories == cur_buf->categories) {
cur_buf->end = end;
return true;
}
if (cur_buf->end) {
if (p->vec_buf_index >= p->vec_buf_len - 1)
return false;
cur_buf = &p->vec_buf[++p->vec_buf_index];
}
cur_buf->start = addr;
cur_buf->end = end;
cur_buf->categories = categories;
return true;
}
static int pagemap_scan_output(unsigned long categories,
struct pagemap_scan_private *p,
unsigned long addr, unsigned long *end)
{
unsigned long n_pages, total_pages;
int ret = 0;
if (!p->vec_buf)
return 0;
categories &= p->arg.return_mask;
n_pages = (*end - addr) / PAGE_SIZE;
if (check_add_overflow(p->found_pages, n_pages, &total_pages) ||
total_pages > p->arg.max_pages) {
size_t n_too_much = total_pages - p->arg.max_pages;
*end -= n_too_much * PAGE_SIZE;
n_pages -= n_too_much;
ret = -ENOSPC;
}
if (!pagemap_scan_push_range(categories, p, addr, *end)) {
*end = addr;
n_pages = 0;
ret = -ENOSPC;
}
p->found_pages += n_pages;
if (ret)
p->arg.walk_end = *end;
return ret;
}
static int pagemap_scan_thp_entry(pmd_t *pmd, unsigned long start,
unsigned long end, struct mm_walk *walk)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct pagemap_scan_private *p = walk->private;
struct vm_area_struct *vma = walk->vma;
unsigned long categories;
spinlock_t *ptl;
int ret = 0;
ptl = pmd_trans_huge_lock(pmd, vma);
if (!ptl)
return -ENOENT;
categories = p->cur_vma_category |
pagemap_thp_category(p, vma, start, *pmd);
if (!pagemap_scan_is_interesting_page(categories, p))
goto out_unlock;
ret = pagemap_scan_output(categories, p, start, &end);
if (start == end)
goto out_unlock;
if (~p->arg.flags & PM_SCAN_WP_MATCHING)
goto out_unlock;
if (~categories & PAGE_IS_WRITTEN)
goto out_unlock;
/*
* Break huge page into small pages if the WP operation
* needs to be performed on a portion of the huge page.
*/
if (end != start + HPAGE_SIZE) {
spin_unlock(ptl);
split_huge_pmd(vma, pmd, start);
pagemap_scan_backout_range(p, start, end);
/* Report as if there was no THP */
return -ENOENT;
}
make_uffd_wp_pmd(vma, start, pmd);
flush_tlb_range(vma, start, end);
out_unlock:
spin_unlock(ptl);
return ret;
#else /* !CONFIG_TRANSPARENT_HUGEPAGE */
return -ENOENT;
#endif
}
static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start,
unsigned long end, struct mm_walk *walk)
{
struct pagemap_scan_private *p = walk->private;
struct vm_area_struct *vma = walk->vma;
unsigned long addr, flush_end = 0;
pte_t *pte, *start_pte;
spinlock_t *ptl;
int ret;
arch_enter_lazy_mmu_mode();
ret = pagemap_scan_thp_entry(pmd, start, end, walk);
if (ret != -ENOENT) {
arch_leave_lazy_mmu_mode();
return ret;
}
ret = 0;
start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl);
if (!pte) {
arch_leave_lazy_mmu_mode();
walk->action = ACTION_AGAIN;
return 0;
}
for (addr = start; addr != end; pte++, addr += PAGE_SIZE) {
unsigned long categories = p->cur_vma_category |
pagemap_page_category(p, vma, addr, ptep_get(pte));
unsigned long next = addr + PAGE_SIZE;
if (!pagemap_scan_is_interesting_page(categories, p))
continue;
ret = pagemap_scan_output(categories, p, addr, &next);
if (next == addr)
break;
if (~p->arg.flags & PM_SCAN_WP_MATCHING)
continue;
if (~categories & PAGE_IS_WRITTEN)
continue;
make_uffd_wp_pte(vma, addr, pte);
if (!flush_end)
start = addr;
flush_end = next;
}
if (flush_end)
flush_tlb_range(vma, start, addr);
pte_unmap_unlock(start_pte, ptl);
arch_leave_lazy_mmu_mode();
cond_resched();
return ret;
}
#ifdef CONFIG_HUGETLB_PAGE
static int pagemap_scan_hugetlb_entry(pte_t *ptep, unsigned long hmask,
unsigned long start, unsigned long end,
struct mm_walk *walk)
{
struct pagemap_scan_private *p = walk->private;
struct vm_area_struct *vma = walk->vma;
unsigned long categories;
spinlock_t *ptl;
int ret = 0;
pte_t pte;
if (~p->arg.flags & PM_SCAN_WP_MATCHING) {
/* Go the short route when not write-protecting pages. */
pte = huge_ptep_get(ptep);
categories = p->cur_vma_category | pagemap_hugetlb_category(pte);
if (!pagemap_scan_is_interesting_page(categories, p))
return 0;
return pagemap_scan_output(categories, p, start, &end);
}
i_mmap_lock_write(vma->vm_file->f_mapping);
ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, ptep);
pte = huge_ptep_get(ptep);
categories = p->cur_vma_category | pagemap_hugetlb_category(pte);
if (!pagemap_scan_is_interesting_page(categories, p))
goto out_unlock;
ret = pagemap_scan_output(categories, p, start, &end);
if (start == end)
goto out_unlock;
if (~categories & PAGE_IS_WRITTEN)
goto out_unlock;
if (end != start + HPAGE_SIZE) {
/* Partial HugeTLB page WP isn't possible. */
pagemap_scan_backout_range(p, start, end);
p->arg.walk_end = start;
ret = 0;
goto out_unlock;
}
make_uffd_wp_huge_pte(vma, start, ptep, pte);
flush_hugetlb_tlb_range(vma, start, end);
out_unlock:
spin_unlock(ptl);
i_mmap_unlock_write(vma->vm_file->f_mapping);
return ret;
}
#else
#define pagemap_scan_hugetlb_entry NULL
#endif
static int pagemap_scan_pte_hole(unsigned long addr, unsigned long end,
int depth, struct mm_walk *walk)
{
struct pagemap_scan_private *p = walk->private;
struct vm_area_struct *vma = walk->vma;
int ret, err;
if (!vma || !pagemap_scan_is_interesting_page(p->cur_vma_category, p))
return 0;
ret = pagemap_scan_output(p->cur_vma_category, p, addr, &end);
if (addr == end)
return ret;
if (~p->arg.flags & PM_SCAN_WP_MATCHING)
return ret;
err = uffd_wp_range(vma, addr, end - addr, true);
if (err < 0)
ret = err;
return ret;
}
static const struct mm_walk_ops pagemap_scan_ops = {
.test_walk = pagemap_scan_test_walk,
.pmd_entry = pagemap_scan_pmd_entry,
.pte_hole = pagemap_scan_pte_hole,
.hugetlb_entry = pagemap_scan_hugetlb_entry,
};
static int pagemap_scan_get_args(struct pm_scan_arg *arg,
unsigned long uarg)
{
if (copy_from_user(arg, (void __user *)uarg, sizeof(*arg)))
return -EFAULT;
if (arg->size != sizeof(struct pm_scan_arg))
return -EINVAL;
/* Validate requested features */
if (arg->flags & ~PM_SCAN_FLAGS)
return -EINVAL;
if ((arg->category_inverted | arg->category_mask |
arg->category_anyof_mask | arg->return_mask) & ~PM_SCAN_CATEGORIES)
return -EINVAL;
arg->start = untagged_addr((unsigned long)arg->start);
arg->end = untagged_addr((unsigned long)arg->end);
arg->vec = untagged_addr((unsigned long)arg->vec);
/* Validate memory pointers */
if (!IS_ALIGNED(arg->start, PAGE_SIZE))
return -EINVAL;
if (!access_ok((void __user *)(long)arg->start, arg->end - arg->start))
return -EFAULT;
if (!arg->vec && arg->vec_len)
return -EINVAL;
if (arg->vec && !access_ok((void __user *)(long)arg->vec,
arg->vec_len * sizeof(struct page_region)))
return -EFAULT;
/* Fixup default values */
arg->end = ALIGN(arg->end, PAGE_SIZE);
arg->walk_end = 0;
if (!arg->max_pages)
arg->max_pages = ULONG_MAX;
return 0;
}
static int pagemap_scan_writeback_args(struct pm_scan_arg *arg,
unsigned long uargl)
{
struct pm_scan_arg __user *uarg = (void __user *)uargl;
if (copy_to_user(&uarg->walk_end, &arg->walk_end, sizeof(arg->walk_end)))
return -EFAULT;
return 0;
}
static int pagemap_scan_init_bounce_buffer(struct pagemap_scan_private *p)
{
if (!p->arg.vec_len)
return 0;
p->vec_buf_len = min_t(size_t, PAGEMAP_WALK_SIZE >> PAGE_SHIFT,
p->arg.vec_len);
p->vec_buf = kmalloc_array(p->vec_buf_len, sizeof(*p->vec_buf),
GFP_KERNEL);
if (!p->vec_buf)
return -ENOMEM;
p->vec_buf->start = p->vec_buf->end = 0;
p->vec_out = (struct page_region __user *)(long)p->arg.vec;
return 0;
}
static long pagemap_scan_flush_buffer(struct pagemap_scan_private *p)
{
const struct page_region *buf = p->vec_buf;
long n = p->vec_buf_index;
if (!p->vec_buf)
return 0;
if (buf[n].end != buf[n].start)
n++;
if (!n)
return 0;
if (copy_to_user(p->vec_out, buf, n * sizeof(*buf)))
return -EFAULT;
p->arg.vec_len -= n;
p->vec_out += n;
p->vec_buf_index = 0;
p->vec_buf_len = min_t(size_t, p->vec_buf_len, p->arg.vec_len);
p->vec_buf->start = p->vec_buf->end = 0;
return n;
}
static long do_pagemap_scan(struct mm_struct *mm, unsigned long uarg)
{
struct mmu_notifier_range range;
struct pagemap_scan_private p = {0};
unsigned long walk_start;
size_t n_ranges_out = 0;
int ret;
ret = pagemap_scan_get_args(&p.arg, uarg);
if (ret)
return ret;
p.masks_of_interest = p.arg.category_mask | p.arg.category_anyof_mask |
p.arg.return_mask;
ret = pagemap_scan_init_bounce_buffer(&p);
if (ret)
return ret;
/* Protection change for the range is going to happen. */
if (p.arg.flags & PM_SCAN_WP_MATCHING) {
mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_VMA, 0,
mm, p.arg.start, p.arg.end);
mmu_notifier_invalidate_range_start(&range);
}
for (walk_start = p.arg.start; walk_start < p.arg.end;
walk_start = p.arg.walk_end) {
long n_out;
if (fatal_signal_pending(current)) {
ret = -EINTR;
break;
}
ret = mmap_read_lock_killable(mm);
if (ret)
break;
ret = walk_page_range(mm, walk_start, p.arg.end,
&pagemap_scan_ops, &p);
mmap_read_unlock(mm);
n_out = pagemap_scan_flush_buffer(&p);
if (n_out < 0)
ret = n_out;
else
n_ranges_out += n_out;
if (ret != -ENOSPC)
break;
if (p.arg.vec_len == 0 || p.found_pages == p.arg.max_pages)
break;
}
/* ENOSPC signifies early stop (buffer full) from the walk. */
if (!ret || ret == -ENOSPC)
ret = n_ranges_out;
/* The walk_end isn't set when ret is zero */
if (!p.arg.walk_end)
p.arg.walk_end = p.arg.end;
if (pagemap_scan_writeback_args(&p.arg, uarg))
ret = -EFAULT;
if (p.arg.flags & PM_SCAN_WP_MATCHING)
mmu_notifier_invalidate_range_end(&range);
kfree(p.vec_buf);
return ret;
}
static long do_pagemap_cmd(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct mm_struct *mm = file->private_data;
switch (cmd) {
case PAGEMAP_SCAN:
return do_pagemap_scan(mm, arg);
default:
return -EINVAL;
}
}
const struct file_operations proc_pagemap_operations = {
.llseek = mem_lseek, /* borrow this */
.read = pagemap_read,
.open = pagemap_open,
.release = pagemap_release,
.unlocked_ioctl = do_pagemap_cmd,
.compat_ioctl = do_pagemap_cmd,
};
#endif /* CONFIG_PROC_PAGE_MONITOR */
......
......@@ -280,6 +280,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long cp_flags);
bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
......
......@@ -221,6 +221,13 @@ static inline vm_fault_t handle_userfault(struct vm_fault *vmf,
return VM_FAULT_SIGBUS;
}
static inline long uffd_wp_range(struct vm_area_struct *vma,
unsigned long start, unsigned long len,
bool enable_wp)
{
return false;
}
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
struct vm_userfaultfd_ctx vm_ctx)
{
......
......@@ -305,4 +305,63 @@ typedef int __bitwise __kernel_rwf_t;
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
RWF_APPEND)
/* Pagemap ioctl */
#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg)
/* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */
#define PAGE_IS_WPALLOWED (1 << 0)
#define PAGE_IS_WRITTEN (1 << 1)
#define PAGE_IS_FILE (1 << 2)
#define PAGE_IS_PRESENT (1 << 3)
#define PAGE_IS_SWAPPED (1 << 4)
#define PAGE_IS_PFNZERO (1 << 5)
#define PAGE_IS_HUGE (1 << 6)
/*
* struct page_region - Page region with flags
* @start: Start of the region
* @end: End of the region (exclusive)
* @categories: PAGE_IS_* category bitmask for the region
*/
struct page_region {
__u64 start;
__u64 end;
__u64 categories;
};
/* Flags for PAGEMAP_SCAN ioctl */
#define PM_SCAN_WP_MATCHING (1 << 0) /* Write protect the pages matched. */
#define PM_SCAN_CHECK_WPASYNC (1 << 1) /* Abort the scan when a non-WP-enabled page is found. */
/*
* struct pm_scan_arg - Pagemap ioctl argument
* @size: Size of the structure
* @flags: Flags for the IOCTL
* @start: Starting address of the region
* @end: Ending address of the region
* @walk_end Address where the scan stopped (written by kernel).
* walk_end == end (address tags cleared) informs that the scan completed on entire range.
* @vec: Address of page_region struct array for output
* @vec_len: Length of the page_region struct array
* @max_pages: Optional limit for number of returned pages (0 = disabled)
* @category_inverted: PAGE_IS_* categories which values match if 0 instead of 1
* @category_mask: Skip pages for which any category doesn't match
* @category_anyof_mask: Skip pages for which no category matches
* @return_mask: PAGE_IS_* categories that are to be reported in `page_region`s returned
*/
struct pm_scan_arg {
__u64 size;
__u64 flags;
__u64 start;
__u64 end;
__u64 walk_end;
__u64 vec;
__u64 vec_len;
__u64 max_pages;
__u64 category_inverted;
__u64 category_mask;
__u64 category_anyof_mask;
__u64 return_mask;
};
#endif /* _UAPI_LINUX_FS_H */
......@@ -5044,7 +5044,7 @@ bool is_hugetlb_entry_migration(pte_t pte)
return false;
}
static bool is_hugetlb_entry_hwpoisoned(pte_t pte)
bool is_hugetlb_entry_hwpoisoned(pte_t pte)
{
swp_entry_t swp;
......@@ -6266,7 +6266,8 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
}
entry = huge_pte_clear_uffd_wp(entry);
set_huge_pte_at(mm, haddr, ptep, entry);
set_huge_pte_at(mm, haddr, ptep, entry,
huge_page_size(hstate_vma(vma)));
/* Fallthrough to CoW */
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment