Commit 76aefad6 authored by Peter Xu's avatar Peter Xu Committed by akpm

mm/mprotect: fix soft-dirty check in can_change_pte_writable()

Patch series "mm/mprotect: Fix soft-dirty checks", v4.


This patch (of 3):

The check wanted to make sure when soft-dirty tracking is enabled we won't
grant write bit by accident, as a page fault is needed for dirty tracking.
The intention is correct but we didn't check it right because
VM_SOFTDIRTY set actually means soft-dirty tracking disabled.  Fix it.

There's another thing tricky about soft-dirty is that, we can't check the
vma flag !(vma_flags & VM_SOFTDIRTY) directly but only check it after we
checked CONFIG_MEM_SOFT_DIRTY because otherwise VM_SOFTDIRTY will be
defined as zero, and !(vma_flags & VM_SOFTDIRTY) will constantly return
true.  To avoid misuse, introduce a helper for checking whether vma has
soft-dirty tracking enabled.

We can easily verify this with any exclusive anonymous page, like program
below:

=======8<======
  #include <stdio.h>
  #include <unistd.h>
  #include <stdlib.h>
  #include <assert.h>
  #include <inttypes.h>
  #include <stdint.h>
  #include <sys/types.h>
  #include <sys/mman.h>
  #include <sys/types.h>
  #include <sys/stat.h>
  #include <unistd.h>
  #include <fcntl.h>
  #include <stdbool.h>

  #define BIT_ULL(nr)                   (1ULL << (nr))
  #define PM_SOFT_DIRTY                 BIT_ULL(55)

  unsigned int psize;
  char *page;

  uint64_t pagemap_read_vaddr(int fd, void *vaddr)
  {
      uint64_t value;
      int ret;

      ret = pread(fd, &value, sizeof(uint64_t),
                  ((uint64_t)vaddr >> 12) * sizeof(uint64_t));
      assert(ret == sizeof(uint64_t));

      return value;
  }

  void clear_refs_write(void)
  {
      int fd = open("/proc/self/clear_refs", O_RDWR);

      assert(fd >= 0);
      write(fd, "4", 2);
      close(fd);
  }

  #define  check_soft_dirty(str, expect)  do {                            \
          bool dirty = pagemap_read_vaddr(fd, page) & PM_SOFT_DIRTY;      \
          if (dirty != expect) {                                          \
              printf("ERROR: %s, soft-dirty=%d (expect: %d)
", str, dirty, expect); \
              exit(-1);                                                   \
          }                                                               \
  } while (0)

  int main(void)
  {
      int fd = open("/proc/self/pagemap", O_RDONLY);

      assert(fd >= 0);
      psize = getpagesize();
      page = mmap(NULL, psize, PROT_READ|PROT_WRITE,
                  MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
      assert(page != MAP_FAILED);

      *page = 1;
      check_soft_dirty("Just faulted in page", 1);
      clear_refs_write();
      check_soft_dirty("Clear_refs written", 0);
      mprotect(page, psize, PROT_READ);
      check_soft_dirty("Marked RO", 0);
      mprotect(page, psize, PROT_READ|PROT_WRITE);
      check_soft_dirty("Marked RW", 0);
      *page = 2;
      check_soft_dirty("Wrote page again", 1);

      munmap(page, psize);
      close(fd);
      printf("Test passed.
");

      return 0;
  }
=======8<======

Here we attach a Fixes to commit 64fe24a3 only for easy tracking, as
this patch won't apply to a tree before that point.  However the commit
wasn't the source of problem, but instead 64e45507.  It's just that
after 64fe24a3 anonymous memory will also suffer from this problem
with mprotect().

Link: https://lkml.kernel.org/r/20220725142048.30450-1-peterx@redhat.com
Link: https://lkml.kernel.org/r/20220725142048.30450-2-peterx@redhat.com
Fixes: 64e45507 ("mm: softdirty: enable write notifications on VMAs after VM_SOFTDIRTY cleared")
Fixes: 64fe24a3 ("mm/mprotect: try avoiding write faults for exclusive anonymous pages when changing protection")
Signed-off-by: default avatarPeter Xu <peterx@redhat.com>
Reviewed-by: default avatarDavid Hildenbrand <david@redhat.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 68aaee14
......@@ -862,4 +862,22 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
DECLARE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
{
/*
* NOTE: we must check this before VM_SOFTDIRTY on soft-dirty
* enablements, because when without soft-dirty being compiled in,
* VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY)
* will be constantly true.
*/
if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
return false;
/*
* Soft-dirty is kind of special: its tracking is enabled when the
* vma flags not set.
*/
return !(vma->vm_flags & VM_SOFTDIRTY);
}
#endif /* __MM_INTERNAL_H */
......@@ -1647,7 +1647,7 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
return 0;
/* Do we need to track softdirty? */
if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
if (vma_soft_dirty_enabled(vma))
return 1;
/* Specialty mapping? */
......
......@@ -49,7 +49,7 @@ static inline bool can_change_pte_writable(struct vm_area_struct *vma,
return false;
/* Do we need write faults for softdirty tracking? */
if ((vma->vm_flags & VM_SOFTDIRTY) && !pte_soft_dirty(pte))
if (vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte))
return false;
/* Do we need write faults for uffd-wp tracking? */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment