Commit 1e9fdf21 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Linus Torvalds

mmu_gather: Remove per arch tlb_{start,end}_vma()

Scattered across the archs are 3 basic forms of tlb_{start,end}_vma().
Provide two new MMU_GATHER_knobs to enumerate them and remove the per
arch tlb_{start,end}_vma() implementations.

 - MMU_GATHER_NO_FLUSH_CACHE indicates the arch has flush_cache_range()
   but does *NOT* want to call it for each VMA.

 - MMU_GATHER_MERGE_VMAS indicates the arch wants to merge the
   invalidate across multiple VMAs if possible.

With these it is possible to capture the three forms:

  1) empty stubs;
     select MMU_GATHER_NO_FLUSH_CACHE and MMU_GATHER_MERGE_VMAS

  2) start: flush_cache_range(), end: empty;
     select MMU_GATHER_MERGE_VMAS

  3) start: flush_cache_range(), end: flush_tlb_range();
     default

Obviously, if the architecture does not have flush_cache_range() then
it also doesn't need to select MMU_GATHER_NO_FLUSH_CACHE.
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarWill Deacon <will@kernel.org>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 23a67619
...@@ -438,6 +438,13 @@ config MMU_GATHER_PAGE_SIZE ...@@ -438,6 +438,13 @@ config MMU_GATHER_PAGE_SIZE
config MMU_GATHER_NO_RANGE config MMU_GATHER_NO_RANGE
bool bool
select MMU_GATHER_MERGE_VMAS
config MMU_GATHER_NO_FLUSH_CACHE
bool
config MMU_GATHER_MERGE_VMAS
bool
config MMU_GATHER_NO_GATHER config MMU_GATHER_NO_GATHER
bool bool
......
...@@ -4,19 +4,6 @@ ...@@ -4,19 +4,6 @@
#define __ASM_CSKY_TLB_H #define __ASM_CSKY_TLB_H
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#define tlb_start_vma(tlb, vma) \
do { \
if (!(tlb)->fullmm) \
flush_cache_range(vma, (vma)->vm_start, (vma)->vm_end); \
} while (0)
#define tlb_end_vma(tlb, vma) \
do { \
if (!(tlb)->fullmm) \
flush_tlb_range(vma, (vma)->vm_start, (vma)->vm_end); \
} while (0)
#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) #define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
#include <asm-generic/tlb.h> #include <asm-generic/tlb.h>
......
...@@ -108,6 +108,7 @@ config LOONGARCH ...@@ -108,6 +108,7 @@ config LOONGARCH
select TRACE_IRQFLAGS_SUPPORT select TRACE_IRQFLAGS_SUPPORT
select USE_PERCPU_NUMA_NODE_ID select USE_PERCPU_NUMA_NODE_ID
select ZONE_DMA32 select ZONE_DMA32
select MMU_GATHER_MERGE_VMAS if MMU
config 32BIT config 32BIT
bool bool
......
...@@ -137,16 +137,6 @@ static inline void invtlb_all(u32 op, u32 info, u64 addr) ...@@ -137,16 +137,6 @@ static inline void invtlb_all(u32 op, u32 info, u64 addr)
); );
} }
/*
* LoongArch doesn't need any special per-pte or per-vma handling, except
* we need to flush cache for area to be unmapped.
*/
#define tlb_start_vma(tlb, vma) \
do { \
if (!(tlb)->fullmm) \
flush_cache_range(vma, vma->vm_start, vma->vm_end); \
} while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
static void tlb_flush(struct mmu_gather *tlb); static void tlb_flush(struct mmu_gather *tlb);
......
...@@ -256,6 +256,7 @@ config PPC ...@@ -256,6 +256,7 @@ config PPC
select IRQ_FORCED_THREADING select IRQ_FORCED_THREADING
select MMU_GATHER_PAGE_SIZE select MMU_GATHER_PAGE_SIZE
select MMU_GATHER_RCU_TABLE_FREE select MMU_GATHER_RCU_TABLE_FREE
select MMU_GATHER_MERGE_VMAS
select MODULES_USE_ELF_RELA select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE
select NEED_PER_CPU_EMBED_FIRST_CHUNK if PPC64 select NEED_PER_CPU_EMBED_FIRST_CHUNK if PPC64
......
...@@ -19,8 +19,6 @@ ...@@ -19,8 +19,6 @@
#include <linux/pagemap.h> #include <linux/pagemap.h>
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry __tlb_remove_tlb_entry #define __tlb_remove_tlb_entry __tlb_remove_tlb_entry
#define tlb_flush tlb_flush #define tlb_flush tlb_flush
......
...@@ -204,6 +204,7 @@ config S390 ...@@ -204,6 +204,7 @@ config S390
select IOMMU_SUPPORT if PCI select IOMMU_SUPPORT if PCI
select MMU_GATHER_NO_GATHER select MMU_GATHER_NO_GATHER
select MMU_GATHER_RCU_TABLE_FREE select MMU_GATHER_RCU_TABLE_FREE
select MMU_GATHER_MERGE_VMAS
select MODULES_USE_ELF_RELA select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PCI select NEED_DMA_MAP_STATE if PCI
select NEED_SG_DMA_LENGTH if PCI select NEED_SG_DMA_LENGTH if PCI
......
...@@ -27,9 +27,6 @@ static inline void tlb_flush(struct mmu_gather *tlb); ...@@ -27,9 +27,6 @@ static inline void tlb_flush(struct mmu_gather *tlb);
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
struct page *page, int page_size); struct page *page, int page_size);
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
#define tlb_flush tlb_flush #define tlb_flush tlb_flush
#define pte_free_tlb pte_free_tlb #define pte_free_tlb pte_free_tlb
#define pmd_free_tlb pmd_free_tlb #define pmd_free_tlb pmd_free_tlb
......
...@@ -67,6 +67,8 @@ config SPARC64 ...@@ -67,6 +67,8 @@ config SPARC64
select HAVE_KRETPROBES select HAVE_KRETPROBES
select HAVE_KPROBES select HAVE_KPROBES
select MMU_GATHER_RCU_TABLE_FREE if SMP select MMU_GATHER_RCU_TABLE_FREE if SMP
select MMU_GATHER_MERGE_VMAS
select MMU_GATHER_NO_FLUSH_CACHE
select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FTRACE_MCOUNT_RECORD
......
...@@ -22,8 +22,6 @@ void smp_flush_tlb_mm(struct mm_struct *mm); ...@@ -22,8 +22,6 @@ void smp_flush_tlb_mm(struct mm_struct *mm);
void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *); void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *);
void flush_tlb_pending(void); void flush_tlb_pending(void);
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
#define tlb_flush(tlb) flush_tlb_pending() #define tlb_flush(tlb) flush_tlb_pending()
/* /*
......
...@@ -245,6 +245,7 @@ config X86 ...@@ -245,6 +245,7 @@ config X86
select HAVE_PERF_REGS select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP select HAVE_PERF_USER_STACK_DUMP
select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT
select MMU_GATHER_MERGE_VMAS
select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if UNWINDER_ORC || STACK_VALIDATION select HAVE_RELIABLE_STACKTRACE if UNWINDER_ORC || STACK_VALIDATION
......
...@@ -2,9 +2,6 @@ ...@@ -2,9 +2,6 @@
#ifndef _ASM_X86_TLB_H #ifndef _ASM_X86_TLB_H
#define _ASM_X86_TLB_H #define _ASM_X86_TLB_H
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
#define tlb_flush tlb_flush #define tlb_flush tlb_flush
static inline void tlb_flush(struct mmu_gather *tlb); static inline void tlb_flush(struct mmu_gather *tlb);
......
...@@ -158,9 +158,24 @@ ...@@ -158,9 +158,24 @@
* Useful if your architecture doesn't use IPIs for remote TLB invalidates * Useful if your architecture doesn't use IPIs for remote TLB invalidates
* and therefore doesn't naturally serialize with software page-table walkers. * and therefore doesn't naturally serialize with software page-table walkers.
* *
* MMU_GATHER_NO_FLUSH_CACHE
*
* Indicates the architecture has flush_cache_range() but it needs *NOT* be called
* before unmapping a VMA.
*
* NOTE: strictly speaking we shouldn't have this knob and instead rely on
* flush_cache_range() being a NOP, except Sparc64 seems to be
* different here.
*
* MMU_GATHER_MERGE_VMAS
*
* Indicates the architecture wants to merge ranges over VMAs; typical when
* multiple range invalidates are more expensive than a full invalidate.
*
* MMU_GATHER_NO_RANGE * MMU_GATHER_NO_RANGE
* *
* Use this if your architecture lacks an efficient flush_tlb_range(). * Use this if your architecture lacks an efficient flush_tlb_range(). This
* option implies MMU_GATHER_MERGE_VMAS above.
* *
* MMU_GATHER_NO_GATHER * MMU_GATHER_NO_GATHER
* *
...@@ -493,14 +508,16 @@ static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct * ...@@ -493,14 +508,16 @@ static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *
return; return;
tlb_update_vma_flags(tlb, vma); tlb_update_vma_flags(tlb, vma);
#ifndef CONFIG_MMU_GATHER_NO_FLUSH_CACHE
flush_cache_range(vma, vma->vm_start, vma->vm_end); flush_cache_range(vma, vma->vm_start, vma->vm_end);
#endif
} }
#endif #endif
#ifndef tlb_end_vma #ifndef tlb_end_vma
static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
{ {
if (tlb->fullmm) if (tlb->fullmm || IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS))
return; return;
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment