Commit c9fe6656 authored by Nadav Amit's avatar Nadav Amit Committed by Andrew Morton

mm/mprotect: do not flush when not required architecturally

Currently, using mprotect() to unprotect a memory region or uffd to
unprotect a memory region causes a TLB flush.  However, in such cases the
PTE is often not modified (i.e., remain RO) and therefore not TLB flush is
needed.

Add an arch-specific pte_needs_flush() which tells whether a TLB flush is
needed based on the old PTE and the new one.  Implement an x86
pte_needs_flush().

Always flush the TLB when it is architecturally needed even when skipping
a TLB flush might only result in a spurious page-faults by skipping the
flush.

Even with such conservative manner, we can in the future further refine
the checks to test whether a PTE is present by only considering the
architectural _PAGE_PRESENT flag instead of {pte|pmd}_preesnt().  For not
be careful and use the latter.

Link: https://lkml.kernel.org/r/20220401180821.1986781-3-namit@vmware.comSigned-off-by: default avatarNadav Amit <namit@vmware.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Nick Piggin <npiggin@gmail.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 4a18419f
...@@ -110,9 +110,11 @@ ...@@ -110,9 +110,11 @@
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
#define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP) #define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP)
#define _PAGE_SOFTW4 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW4)
#else #else
#define _PAGE_NX (_AT(pteval_t, 0)) #define _PAGE_NX (_AT(pteval_t, 0))
#define _PAGE_DEVMAP (_AT(pteval_t, 0)) #define _PAGE_DEVMAP (_AT(pteval_t, 0))
#define _PAGE_SOFTW4 (_AT(pteval_t, 0))
#endif #endif
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
......
...@@ -259,6 +259,103 @@ static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, ...@@ -259,6 +259,103 @@ static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
static inline bool pte_flags_need_flush(unsigned long oldflags,
unsigned long newflags,
bool ignore_access)
{
/*
* Flags that require a flush when cleared but not when they are set.
* Only include flags that would not trigger spurious page-faults.
* Non-present entries are not cached. Hardware would set the
* dirty/access bit if needed without a fault.
*/
const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT |
_PAGE_ACCESSED;
const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 |
_PAGE_SOFTW3 | _PAGE_SOFTW4;
const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT |
_PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT |
_PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 |
_PAGE_PKEY_BIT2 | _PAGE_PKEY_BIT3 | _PAGE_NX;
unsigned long diff = oldflags ^ newflags;
BUILD_BUG_ON(flush_on_clear & software_flags);
BUILD_BUG_ON(flush_on_clear & flush_on_change);
BUILD_BUG_ON(flush_on_change & software_flags);
/* Ignore software flags */
diff &= ~software_flags;
if (ignore_access)
diff &= ~_PAGE_ACCESSED;
/*
* Did any of the 'flush_on_clear' flags was clleared set from between
* 'oldflags' and 'newflags'?
*/
if (diff & oldflags & flush_on_clear)
return true;
/* Flush on modified flags. */
if (diff & flush_on_change)
return true;
/* Ensure there are no flags that were left behind */
if (IS_ENABLED(CONFIG_DEBUG_VM) &&
(diff & ~(flush_on_clear | software_flags | flush_on_change))) {
VM_WARN_ON_ONCE(1);
return true;
}
return false;
}
/*
* pte_needs_flush() checks whether permissions were demoted and require a
* flush. It should only be used for userspace PTEs.
*/
static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
{
/* !PRESENT -> * ; no need for flush */
if (!(pte_flags(oldpte) & _PAGE_PRESENT))
return false;
/* PFN changed ; needs flush */
if (pte_pfn(oldpte) != pte_pfn(newpte))
return true;
/*
* check PTE flags; ignore access-bit; see comment in
* ptep_clear_flush_young().
*/
return pte_flags_need_flush(pte_flags(oldpte), pte_flags(newpte),
true);
}
#define pte_needs_flush pte_needs_flush
/*
* huge_pmd_needs_flush() checks whether permissions were demoted and require a
* flush. It should only be used for userspace huge PMDs.
*/
static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
{
/* !PRESENT -> * ; no need for flush */
if (!(pmd_flags(oldpmd) & _PAGE_PRESENT))
return false;
/* PFN changed ; needs flush */
if (pmd_pfn(oldpmd) != pmd_pfn(newpmd))
return true;
/*
* check PMD flags; do not ignore access-bit; see
* pmdp_clear_flush_young().
*/
return pte_flags_need_flush(pmd_flags(oldpmd), pmd_flags(newpmd),
false);
}
#define huge_pmd_needs_flush huge_pmd_needs_flush
#endif /* !MODULE */ #endif /* !MODULE */
static inline void __native_tlb_flush_global(unsigned long cr4) static inline void __native_tlb_flush_global(unsigned long cr4)
......
...@@ -658,6 +658,20 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, ...@@ -658,6 +658,20 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
} while (0) } while (0)
#endif #endif
#ifndef pte_needs_flush
static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
{
return true;
}
#endif
#ifndef huge_pmd_needs_flush
static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
{
return true;
}
#endif
#endif /* CONFIG_MMU */ #endif /* CONFIG_MMU */
#endif /* _ASM_GENERIC__TLB_H */ #endif /* _ASM_GENERIC__TLB_H */
...@@ -1715,7 +1715,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -1715,7 +1715,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
spinlock_t *ptl; spinlock_t *ptl;
pmd_t entry; pmd_t oldpmd, entry;
bool preserve_write; bool preserve_write;
int ret; int ret;
bool prot_numa = cp_flags & MM_CP_PROT_NUMA; bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
...@@ -1804,9 +1804,9 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -1804,9 +1804,9 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
* pmdp_invalidate() is required to make sure we don't miss * pmdp_invalidate() is required to make sure we don't miss
* dirty/young flags set by hardware. * dirty/young flags set by hardware.
*/ */
entry = pmdp_invalidate(vma, addr, pmd); oldpmd = pmdp_invalidate(vma, addr, pmd);
entry = pmd_modify(entry, newprot); entry = pmd_modify(oldpmd, newprot);
if (preserve_write) if (preserve_write)
entry = pmd_mk_savedwrite(entry); entry = pmd_mk_savedwrite(entry);
if (uffd_wp) { if (uffd_wp) {
...@@ -1823,7 +1823,8 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -1823,7 +1823,8 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
ret = HPAGE_PMD_NR; ret = HPAGE_PMD_NR;
set_pmd_at(mm, addr, pmd, entry); set_pmd_at(mm, addr, pmd, entry);
tlb_flush_pmd_range(tlb, addr, HPAGE_PMD_SIZE); if (huge_pmd_needs_flush(oldpmd, entry))
tlb_flush_pmd_range(tlb, addr, HPAGE_PMD_SIZE);
BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry)); BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry));
unlock: unlock:
......
...@@ -152,7 +152,8 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, ...@@ -152,7 +152,8 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
ptent = pte_mkwrite(ptent); ptent = pte_mkwrite(ptent);
} }
ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent); ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
tlb_flush_pte_range(tlb, addr, PAGE_SIZE); if (pte_needs_flush(oldpte, ptent))
tlb_flush_pte_range(tlb, addr, PAGE_SIZE);
pages++; pages++;
} else if (is_swap_pte(oldpte)) { } else if (is_swap_pte(oldpte)) {
swp_entry_t entry = pte_to_swp_entry(oldpte); swp_entry_t entry = pte_to_swp_entry(oldpte);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment