Commit 635de956 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86-mm-2021-04-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 tlb updates from Ingo Molnar:
 "The x86 MM changes in this cycle were:

   - Implement concurrent TLB flushes, which overlaps the local TLB
     flush with the remote TLB flush.

     In testing this improved sysbench performance measurably by a
     couple of percentage points, especially if TLB-heavy security
     mitigations are active.

   - Further micro-optimizations to improve the performance of TLB
     flushes"

* tag 'x86-mm-2021-04-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  smp: Micro-optimize smp_call_function_many_cond()
  smp: Inline on_each_cpu_cond() and on_each_cpu()
  x86/mm/tlb: Remove unnecessary uses of the inline keyword
  cpumask: Mark functions as pure
  x86/mm/tlb: Do not make is_lazy dirty for no reason
  x86/mm/tlb: Privatize cpu_tlbstate
  x86/mm/tlb: Flush remote and local TLBs concurrently
  x86/mm/tlb: Open-code on_each_cpu_cond_mask() for tlb_is_not_lazy()
  x86/mm/tlb: Unify flush_tlb_func_local() and flush_tlb_func_remote()
  smp: Run functions concurrently in smp_call_function_many_cond()
parents d0cc7eca a500fc91
...@@ -52,7 +52,7 @@ static inline int fill_gva_list(u64 gva_list[], int offset, ...@@ -52,7 +52,7 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
return gva_n - offset; return gva_n - offset;
} }
static void hyperv_flush_tlb_others(const struct cpumask *cpus, static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
const struct flush_tlb_info *info) const struct flush_tlb_info *info)
{ {
int cpu, vcpu, gva_n, max_gvas; int cpu, vcpu, gva_n, max_gvas;
...@@ -61,7 +61,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, ...@@ -61,7 +61,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
u64 status; u64 status;
unsigned long flags; unsigned long flags;
trace_hyperv_mmu_flush_tlb_others(cpus, info); trace_hyperv_mmu_flush_tlb_multi(cpus, info);
if (!hv_hypercall_pg) if (!hv_hypercall_pg)
goto do_native; goto do_native;
...@@ -164,7 +164,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, ...@@ -164,7 +164,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
if (hv_result_success(status)) if (hv_result_success(status))
return; return;
do_native: do_native:
native_flush_tlb_others(cpus, info); native_flush_tlb_multi(cpus, info);
} }
static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
...@@ -239,6 +239,6 @@ void hyperv_setup_mmu_ops(void) ...@@ -239,6 +239,6 @@ void hyperv_setup_mmu_ops(void)
return; return;
pr_info("Using hypercall for remote TLB flush\n"); pr_info("Using hypercall for remote TLB flush\n");
pv_ops.mmu.flush_tlb_others = hyperv_flush_tlb_others; pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi;
pv_ops.mmu.tlb_remove_table = tlb_remove_table; pv_ops.mmu.tlb_remove_table = tlb_remove_table;
} }
...@@ -63,7 +63,7 @@ static inline void slow_down_io(void) ...@@ -63,7 +63,7 @@ static inline void slow_down_io(void)
void native_flush_tlb_local(void); void native_flush_tlb_local(void);
void native_flush_tlb_global(void); void native_flush_tlb_global(void);
void native_flush_tlb_one_user(unsigned long addr); void native_flush_tlb_one_user(unsigned long addr);
void native_flush_tlb_others(const struct cpumask *cpumask, void native_flush_tlb_multi(const struct cpumask *cpumask,
const struct flush_tlb_info *info); const struct flush_tlb_info *info);
static inline void __flush_tlb_local(void) static inline void __flush_tlb_local(void)
...@@ -81,10 +81,10 @@ static inline void __flush_tlb_one_user(unsigned long addr) ...@@ -81,10 +81,10 @@ static inline void __flush_tlb_one_user(unsigned long addr)
PVOP_VCALL1(mmu.flush_tlb_one_user, addr); PVOP_VCALL1(mmu.flush_tlb_one_user, addr);
} }
static inline void __flush_tlb_others(const struct cpumask *cpumask, static inline void __flush_tlb_multi(const struct cpumask *cpumask,
const struct flush_tlb_info *info) const struct flush_tlb_info *info)
{ {
PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info); PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info);
} }
static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
......
...@@ -161,7 +161,7 @@ struct pv_mmu_ops { ...@@ -161,7 +161,7 @@ struct pv_mmu_ops {
void (*flush_tlb_user)(void); void (*flush_tlb_user)(void);
void (*flush_tlb_kernel)(void); void (*flush_tlb_kernel)(void);
void (*flush_tlb_one_user)(unsigned long addr); void (*flush_tlb_one_user)(unsigned long addr);
void (*flush_tlb_others)(const struct cpumask *cpus, void (*flush_tlb_multi)(const struct cpumask *cpus,
const struct flush_tlb_info *info); const struct flush_tlb_info *info);
void (*tlb_remove_table)(struct mmu_gather *tlb, void *table); void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
......
...@@ -89,23 +89,6 @@ struct tlb_state { ...@@ -89,23 +89,6 @@ struct tlb_state {
u16 loaded_mm_asid; u16 loaded_mm_asid;
u16 next_asid; u16 next_asid;
/*
* We can be in one of several states:
*
* - Actively using an mm. Our CPU's bit will be set in
* mm_cpumask(loaded_mm) and is_lazy == false;
*
* - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
* will not be set in mm_cpumask(&init_mm) and is_lazy == false.
*
* - Lazily using a real mm. loaded_mm != &init_mm, our bit
* is set in mm_cpumask(loaded_mm), but is_lazy == true.
* We're heuristically guessing that the CR3 load we
* skipped more than makes up for the overhead added by
* lazy mode.
*/
bool is_lazy;
/* /*
* If set we changed the page tables in such a way that we * If set we changed the page tables in such a way that we
* needed an invalidation of all contexts (aka. PCIDs / ASIDs). * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
...@@ -151,7 +134,27 @@ struct tlb_state { ...@@ -151,7 +134,27 @@ struct tlb_state {
*/ */
struct tlb_context ctxs[TLB_NR_DYN_ASIDS]; struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
}; };
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); DECLARE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate);
struct tlb_state_shared {
/*
* We can be in one of several states:
*
* - Actively using an mm. Our CPU's bit will be set in
* mm_cpumask(loaded_mm) and is_lazy == false;
*
* - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
* will not be set in mm_cpumask(&init_mm) and is_lazy == false.
*
* - Lazily using a real mm. loaded_mm != &init_mm, our bit
* is set in mm_cpumask(loaded_mm), but is_lazy == true.
* We're heuristically guessing that the CR3 load we
* skipped more than makes up for the overhead added by
* lazy mode.
*/
bool is_lazy;
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
bool nmi_uaccess_okay(void); bool nmi_uaccess_okay(void);
#define nmi_uaccess_okay nmi_uaccess_okay #define nmi_uaccess_okay nmi_uaccess_okay
...@@ -175,7 +178,7 @@ extern void initialize_tlbstate_and_flush(void); ...@@ -175,7 +178,7 @@ extern void initialize_tlbstate_and_flush(void);
* - flush_tlb_page(vma, vmaddr) flushes one page * - flush_tlb_page(vma, vmaddr) flushes one page
* - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_range(vma, start, end) flushes a range of pages
* - flush_tlb_kernel_range(start, end) flushes a range of kernel pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
* - flush_tlb_others(cpumask, info) flushes TLBs on other cpus * - flush_tlb_multi(cpumask, info) flushes TLBs on multiple cpus
* *
* ..but the i386 has somewhat limited tlb flushing capabilities, * ..but the i386 has somewhat limited tlb flushing capabilities,
* and page-granular flushes are available only on i486 and up. * and page-granular flushes are available only on i486 and up.
...@@ -201,14 +204,15 @@ struct flush_tlb_info { ...@@ -201,14 +204,15 @@ struct flush_tlb_info {
unsigned long start; unsigned long start;
unsigned long end; unsigned long end;
u64 new_tlb_gen; u64 new_tlb_gen;
unsigned int stride_shift; unsigned int initiating_cpu;
bool freed_tables; u8 stride_shift;
u8 freed_tables;
}; };
void flush_tlb_local(void); void flush_tlb_local(void);
void flush_tlb_one_user(unsigned long addr); void flush_tlb_one_user(unsigned long addr);
void flush_tlb_one_kernel(unsigned long addr); void flush_tlb_one_kernel(unsigned long addr);
void flush_tlb_others(const struct cpumask *cpumask, void flush_tlb_multi(const struct cpumask *cpumask,
const struct flush_tlb_info *info); const struct flush_tlb_info *info);
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
#if IS_ENABLED(CONFIG_HYPERV) #if IS_ENABLED(CONFIG_HYPERV)
TRACE_EVENT(hyperv_mmu_flush_tlb_others, TRACE_EVENT(hyperv_mmu_flush_tlb_multi,
TP_PROTO(const struct cpumask *cpus, TP_PROTO(const struct cpumask *cpus,
const struct flush_tlb_info *info), const struct flush_tlb_info *info),
TP_ARGS(cpus, info), TP_ARGS(cpus, info),
......
...@@ -706,7 +706,7 @@ static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm) ...@@ -706,7 +706,7 @@ static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
* with a stale address space WITHOUT being in lazy mode after * with a stale address space WITHOUT being in lazy mode after
* restoring the previous mm. * restoring the previous mm.
*/ */
if (this_cpu_read(cpu_tlbstate.is_lazy)) if (this_cpu_read(cpu_tlbstate_shared.is_lazy))
leave_mm(smp_processor_id()); leave_mm(smp_processor_id());
temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm); temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm);
......
...@@ -613,7 +613,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu) ...@@ -613,7 +613,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
} }
#endif #endif
static void kvm_flush_tlb_others(const struct cpumask *cpumask, static void kvm_flush_tlb_multi(const struct cpumask *cpumask,
const struct flush_tlb_info *info) const struct flush_tlb_info *info)
{ {
u8 state; u8 state;
...@@ -627,6 +627,11 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask, ...@@ -627,6 +627,11 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
* queue flush_on_enter for pre-empted vCPUs * queue flush_on_enter for pre-empted vCPUs
*/ */
for_each_cpu(cpu, flushmask) { for_each_cpu(cpu, flushmask) {
/*
* The local vCPU is never preempted, so we do not explicitly
* skip check for local vCPU - it will never be cleared from
* flushmask.
*/
src = &per_cpu(steal_time, cpu); src = &per_cpu(steal_time, cpu);
state = READ_ONCE(src->preempted); state = READ_ONCE(src->preempted);
if ((state & KVM_VCPU_PREEMPTED)) { if ((state & KVM_VCPU_PREEMPTED)) {
...@@ -636,7 +641,7 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask, ...@@ -636,7 +641,7 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
} }
} }
native_flush_tlb_others(flushmask, info); native_flush_tlb_multi(flushmask, info);
} }
static void __init kvm_guest_init(void) static void __init kvm_guest_init(void)
...@@ -654,7 +659,7 @@ static void __init kvm_guest_init(void) ...@@ -654,7 +659,7 @@ static void __init kvm_guest_init(void)
} }
if (pv_tlb_flush_supported()) { if (pv_tlb_flush_supported()) {
pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others; pv_ops.mmu.flush_tlb_multi = kvm_flush_tlb_multi;
pv_ops.mmu.tlb_remove_table = tlb_remove_table; pv_ops.mmu.tlb_remove_table = tlb_remove_table;
pr_info("KVM setup pv remote TLB flush\n"); pr_info("KVM setup pv remote TLB flush\n");
} }
......
...@@ -291,7 +291,7 @@ struct paravirt_patch_template pv_ops = { ...@@ -291,7 +291,7 @@ struct paravirt_patch_template pv_ops = {
.mmu.flush_tlb_user = native_flush_tlb_local, .mmu.flush_tlb_user = native_flush_tlb_local,
.mmu.flush_tlb_kernel = native_flush_tlb_global, .mmu.flush_tlb_kernel = native_flush_tlb_global,
.mmu.flush_tlb_one_user = native_flush_tlb_one_user, .mmu.flush_tlb_one_user = native_flush_tlb_one_user,
.mmu.flush_tlb_others = native_flush_tlb_others, .mmu.flush_tlb_multi = native_flush_tlb_multi,
.mmu.tlb_remove_table = .mmu.tlb_remove_table =
(void (*)(struct mmu_gather *, void *))tlb_remove_page, (void (*)(struct mmu_gather *, void *))tlb_remove_page,
......
...@@ -1017,7 +1017,7 @@ void __init zone_sizes_init(void) ...@@ -1017,7 +1017,7 @@ void __init zone_sizes_init(void)
free_area_init(max_zone_pfns); free_area_init(max_zone_pfns);
} }
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { __visible DEFINE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate) = {
.loaded_mm = &init_mm, .loaded_mm = &init_mm,
.next_asid = 1, .next_asid = 1,
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
......
This diff is collapsed.
...@@ -1247,7 +1247,7 @@ static void xen_flush_tlb_one_user(unsigned long addr) ...@@ -1247,7 +1247,7 @@ static void xen_flush_tlb_one_user(unsigned long addr)
preempt_enable(); preempt_enable();
} }
static void xen_flush_tlb_others(const struct cpumask *cpus, static void xen_flush_tlb_multi(const struct cpumask *cpus,
const struct flush_tlb_info *info) const struct flush_tlb_info *info)
{ {
struct { struct {
...@@ -1258,7 +1258,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, ...@@ -1258,7 +1258,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
const size_t mc_entry_size = sizeof(args->op) + const size_t mc_entry_size = sizeof(args->op) +
sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus()); sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end); trace_xen_mmu_flush_tlb_multi(cpus, info->mm, info->start, info->end);
if (cpumask_empty(cpus)) if (cpumask_empty(cpus))
return; /* nothing to do */ return; /* nothing to do */
...@@ -1267,9 +1267,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, ...@@ -1267,9 +1267,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
args = mcs.args; args = mcs.args;
args->op.arg2.vcpumask = to_cpumask(args->mask); args->op.arg2.vcpumask = to_cpumask(args->mask);
/* Remove us, and any offline CPUS. */ /* Remove any offline CPUs */
cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
if (info->end != TLB_FLUSH_ALL && if (info->end != TLB_FLUSH_ALL &&
...@@ -2086,7 +2085,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { ...@@ -2086,7 +2085,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.flush_tlb_user = xen_flush_tlb, .flush_tlb_user = xen_flush_tlb,
.flush_tlb_kernel = xen_flush_tlb, .flush_tlb_kernel = xen_flush_tlb,
.flush_tlb_one_user = xen_flush_tlb_one_user, .flush_tlb_one_user = xen_flush_tlb_one_user,
.flush_tlb_others = xen_flush_tlb_others, .flush_tlb_multi = xen_flush_tlb_multi,
.tlb_remove_table = tlb_remove_table, .tlb_remove_table = tlb_remove_table,
.pgd_alloc = xen_pgd_alloc, .pgd_alloc = xen_pgd_alloc,
......
...@@ -206,7 +206,7 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp) ...@@ -206,7 +206,7 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp)
return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits); return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits);
} }
unsigned int cpumask_next(int n, const struct cpumask *srcp); unsigned int __pure cpumask_next(int n, const struct cpumask *srcp);
/** /**
* cpumask_next_zero - get the next unset cpu in a cpumask * cpumask_next_zero - get the next unset cpu in a cpumask
...@@ -223,8 +223,8 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) ...@@ -223,8 +223,8 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1);
} }
int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); int __pure cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); int __pure cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
unsigned int cpumask_local_spread(unsigned int i, int node); unsigned int cpumask_local_spread(unsigned int i, int node);
int cpumask_any_and_distribute(const struct cpumask *src1p, int cpumask_any_and_distribute(const struct cpumask *src1p,
const struct cpumask *src2p); const struct cpumask *src2p);
......
...@@ -50,30 +50,52 @@ extern unsigned int total_cpus; ...@@ -50,30 +50,52 @@ extern unsigned int total_cpus;
int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
int wait); int wait);
void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
void *info, bool wait, const struct cpumask *mask);
int smp_call_function_single_async(int cpu, call_single_data_t *csd);
/* /*
* Call a function on all processors * Call a function on all processors
*/ */
void on_each_cpu(smp_call_func_t func, void *info, int wait); static inline void on_each_cpu(smp_call_func_t func, void *info, int wait)
{
on_each_cpu_cond_mask(NULL, func, info, wait, cpu_online_mask);
}
/* /**
* Call a function on processors specified by mask, which might include * on_each_cpu_mask(): Run a function on processors specified by
* the local one. * cpumask, which may include the local processor.
* @mask: The set of cpus to run on (only runs on online subset).
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
* @wait: If true, wait (atomically) until function has completed
* on other CPUs.
*
* If @wait is true, then returns once @func has returned.
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler. The
* exception is that it may be used during early boot while
* early_boot_irqs_disabled is set.
*/ */
void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, static inline void on_each_cpu_mask(const struct cpumask *mask,
void *info, bool wait); smp_call_func_t func, void *info, bool wait)
{
on_each_cpu_cond_mask(NULL, func, info, wait, mask);
}
/* /*
* Call a function on each processor for which the supplied function * Call a function on each processor for which the supplied function
* cond_func returns a positive value. This may include the local * cond_func returns a positive value. This may include the local
* processor. * processor. May be used during early boot while early_boot_irqs_disabled is
* set. Use local_irq_save/restore() instead of local_irq_disable/enable().
*/ */
void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func, static inline void on_each_cpu_cond(smp_cond_func_t cond_func,
void *info, bool wait); smp_call_func_t func, void *info, bool wait)
{
void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask);
void *info, bool wait, const struct cpumask *mask); }
int smp_call_function_single_async(int cpu, call_single_data_t *csd);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
......
...@@ -346,7 +346,7 @@ TRACE_EVENT(xen_mmu_flush_tlb_one_user, ...@@ -346,7 +346,7 @@ TRACE_EVENT(xen_mmu_flush_tlb_one_user,
TP_printk("addr %lx", __entry->addr) TP_printk("addr %lx", __entry->addr)
); );
TRACE_EVENT(xen_mmu_flush_tlb_others, TRACE_EVENT(xen_mmu_flush_tlb_multi,
TP_PROTO(const struct cpumask *cpus, struct mm_struct *mm, TP_PROTO(const struct cpumask *cpus, struct mm_struct *mm,
unsigned long addr, unsigned long end), unsigned long addr, unsigned long end),
TP_ARGS(cpus, mm, addr, end), TP_ARGS(cpus, mm, addr, end),
......
...@@ -850,12 +850,28 @@ int smp_call_function_any(const struct cpumask *mask, ...@@ -850,12 +850,28 @@ int smp_call_function_any(const struct cpumask *mask,
} }
EXPORT_SYMBOL_GPL(smp_call_function_any); EXPORT_SYMBOL_GPL(smp_call_function_any);
/*
* Flags to be used as scf_flags argument of smp_call_function_many_cond().
*
* %SCF_WAIT: Wait until function execution is completed
* %SCF_RUN_LOCAL: Run also locally if local cpu is set in cpumask
*/
#define SCF_WAIT (1U << 0)
#define SCF_RUN_LOCAL (1U << 1)
static void smp_call_function_many_cond(const struct cpumask *mask, static void smp_call_function_many_cond(const struct cpumask *mask,
smp_call_func_t func, void *info, smp_call_func_t func, void *info,
bool wait, smp_cond_func_t cond_func) unsigned int scf_flags,
smp_cond_func_t cond_func)
{ {
int cpu, last_cpu, this_cpu = smp_processor_id();
struct call_function_data *cfd; struct call_function_data *cfd;
int cpu, next_cpu, this_cpu = smp_processor_id(); bool wait = scf_flags & SCF_WAIT;
bool run_remote = false;
bool run_local = false;
int nr_cpus = 0;
lockdep_assert_preemption_disabled();
/* /*
* Can deadlock when called with interrupts disabled. * Can deadlock when called with interrupts disabled.
...@@ -863,8 +879,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask, ...@@ -863,8 +879,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
* send smp call function interrupt to this cpu and as such deadlocks * send smp call function interrupt to this cpu and as such deadlocks
* can't happen. * can't happen.
*/ */
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() if (cpu_online(this_cpu) && !oops_in_progress &&
&& !oops_in_progress && !early_boot_irqs_disabled); !early_boot_irqs_disabled)
lockdep_assert_irqs_enabled();
/* /*
* When @wait we can deadlock when we interrupt between llist_add() and * When @wait we can deadlock when we interrupt between llist_add() and
...@@ -874,36 +891,22 @@ static void smp_call_function_many_cond(const struct cpumask *mask, ...@@ -874,36 +891,22 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
*/ */
WARN_ON_ONCE(!in_task()); WARN_ON_ONCE(!in_task());
/* Try to fastpath. So, what's a CPU they want? Ignoring this one. */ /* Check if we need local execution. */
if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask))
run_local = true;
/* Check if we need remote execution, i.e., any CPU excluding this one. */
cpu = cpumask_first_and(mask, cpu_online_mask); cpu = cpumask_first_and(mask, cpu_online_mask);
if (cpu == this_cpu) if (cpu == this_cpu)
cpu = cpumask_next_and(cpu, mask, cpu_online_mask); cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
if (cpu < nr_cpu_ids)
run_remote = true;
/* No online cpus? We're done. */ if (run_remote) {
if (cpu >= nr_cpu_ids)
return;
/* Do we have another CPU which isn't us? */
next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
if (next_cpu == this_cpu)
next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
/* Fastpath: do that cpu by itself. */
if (next_cpu >= nr_cpu_ids) {
if (!cond_func || cond_func(cpu, info))
smp_call_function_single(cpu, func, info, wait);
return;
}
cfd = this_cpu_ptr(&cfd_data); cfd = this_cpu_ptr(&cfd_data);
cpumask_and(cfd->cpumask, mask, cpu_online_mask); cpumask_and(cfd->cpumask, mask, cpu_online_mask);
__cpumask_clear_cpu(this_cpu, cfd->cpumask); __cpumask_clear_cpu(this_cpu, cfd->cpumask);
/* Some callers race with other cpus changing the passed mask */
if (unlikely(!cpumask_weight(cfd->cpumask)))
return;
cpumask_clear(cfd->cpumask_ipi); cpumask_clear(cfd->cpumask_ipi);
for_each_cpu(cpu, cfd->cpumask) { for_each_cpu(cpu, cfd->cpumask) {
struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu); struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
...@@ -924,20 +927,39 @@ static void smp_call_function_many_cond(const struct cpumask *mask, ...@@ -924,20 +927,39 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE); cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE);
if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) { if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) {
__cpumask_set_cpu(cpu, cfd->cpumask_ipi); __cpumask_set_cpu(cpu, cfd->cpumask_ipi);
nr_cpus++;
last_cpu = cpu;
cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI); cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI);
} else { } else {
cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI); cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI);
} }
} }
/* Send a message to all CPUs in the map */ cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->ping, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PING);
cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->ping, this_cpu,
CFD_SEQ_NOCPU, CFD_SEQ_PING); /*
* Choose the most efficient way to send an IPI. Note that the
* number of CPUs might be zero due to concurrent changes to the
* provided mask.
*/
if (nr_cpus == 1)
send_call_function_single_ipi(last_cpu);
else if (likely(nr_cpus > 1))
arch_send_call_function_ipi_mask(cfd->cpumask_ipi); arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->pinged, this_cpu,
CFD_SEQ_NOCPU, CFD_SEQ_PINGED);
if (wait) { cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->pinged, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PINGED);
}
if (run_local && (!cond_func || cond_func(this_cpu, info))) {
unsigned long flags;
local_irq_save(flags);
func(info);
local_irq_restore(flags);
}
if (run_remote && wait) {
for_each_cpu(cpu, cfd->cpumask) { for_each_cpu(cpu, cfd->cpumask) {
call_single_data_t *csd; call_single_data_t *csd;
...@@ -948,12 +970,14 @@ static void smp_call_function_many_cond(const struct cpumask *mask, ...@@ -948,12 +970,14 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
} }
/** /**
* smp_call_function_many(): Run a function on a set of other CPUs. * smp_call_function_many(): Run a function on a set of CPUs.
* @mask: The set of cpus to run on (only runs on online subset). * @mask: The set of cpus to run on (only runs on online subset).
* @func: The function to run. This must be fast and non-blocking. * @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function. * @info: An arbitrary pointer to pass to the function.
* @wait: If true, wait (atomically) until function has completed * @flags: Bitmask that controls the operation. If %SCF_WAIT is set, wait
* on other CPUs. * (atomically) until function has completed on other CPUs. If
* %SCF_RUN_LOCAL is set, the function will also be run locally
* if the local CPU is set in the @cpumask.
* *
* If @wait is true, then returns once @func has returned. * If @wait is true, then returns once @func has returned.
* *
...@@ -964,7 +988,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask, ...@@ -964,7 +988,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
void smp_call_function_many(const struct cpumask *mask, void smp_call_function_many(const struct cpumask *mask,
smp_call_func_t func, void *info, bool wait) smp_call_func_t func, void *info, bool wait)
{ {
smp_call_function_many_cond(mask, func, info, wait, NULL); smp_call_function_many_cond(mask, func, info, wait * SCF_WAIT, NULL);
} }
EXPORT_SYMBOL(smp_call_function_many); EXPORT_SYMBOL(smp_call_function_many);
...@@ -1075,56 +1099,6 @@ void __init smp_init(void) ...@@ -1075,56 +1099,6 @@ void __init smp_init(void)
smp_cpus_done(setup_max_cpus); smp_cpus_done(setup_max_cpus);
} }
/*
* Call a function on all processors. May be used during early boot while
* early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
* of local_irq_disable/enable().
*/
void on_each_cpu(smp_call_func_t func, void *info, int wait)
{
unsigned long flags;
preempt_disable();
smp_call_function(func, info, wait);
local_irq_save(flags);
func(info);
local_irq_restore(flags);
preempt_enable();
}
EXPORT_SYMBOL(on_each_cpu);
/**
* on_each_cpu_mask(): Run a function on processors specified by
* cpumask, which may include the local processor.
* @mask: The set of cpus to run on (only runs on online subset).
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
* @wait: If true, wait (atomically) until function has completed
* on other CPUs.
*
* If @wait is true, then returns once @func has returned.
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler. The
* exception is that it may be used during early boot while
* early_boot_irqs_disabled is set.
*/
void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
void *info, bool wait)
{
int cpu = get_cpu();
smp_call_function_many(mask, func, info, wait);
if (cpumask_test_cpu(cpu, mask)) {
unsigned long flags;
local_irq_save(flags);
func(info);
local_irq_restore(flags);
}
put_cpu();
}
EXPORT_SYMBOL(on_each_cpu_mask);
/* /*
* on_each_cpu_cond(): Call a function on each processor for which * on_each_cpu_cond(): Call a function on each processor for which
* the supplied function cond_func returns true, optionally waiting * the supplied function cond_func returns true, optionally waiting
...@@ -1150,27 +1124,17 @@ EXPORT_SYMBOL(on_each_cpu_mask); ...@@ -1150,27 +1124,17 @@ EXPORT_SYMBOL(on_each_cpu_mask);
void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
void *info, bool wait, const struct cpumask *mask) void *info, bool wait, const struct cpumask *mask)
{ {
int cpu = get_cpu(); unsigned int scf_flags = SCF_RUN_LOCAL;
smp_call_function_many_cond(mask, func, info, wait, cond_func); if (wait)
if (cpumask_test_cpu(cpu, mask) && cond_func(cpu, info)) { scf_flags |= SCF_WAIT;
unsigned long flags;
local_irq_save(flags); preempt_disable();
func(info); smp_call_function_many_cond(mask, func, info, scf_flags, cond_func);
local_irq_restore(flags); preempt_enable();
}
put_cpu();
} }
EXPORT_SYMBOL(on_each_cpu_cond_mask); EXPORT_SYMBOL(on_each_cpu_cond_mask);
void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
void *info, bool wait)
{
on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask);
}
EXPORT_SYMBOL(on_each_cpu_cond);
static void do_nothing(void *unused) static void do_nothing(void *unused)
{ {
} }
......
...@@ -36,35 +36,6 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd) ...@@ -36,35 +36,6 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd)
} }
EXPORT_SYMBOL(smp_call_function_single_async); EXPORT_SYMBOL(smp_call_function_single_async);
void on_each_cpu(smp_call_func_t func, void *info, int wait)
{
unsigned long flags;
local_irq_save(flags);
func(info);
local_irq_restore(flags);
}
EXPORT_SYMBOL(on_each_cpu);
/*
* Note we still need to test the mask even for UP
* because we actually can get an empty mask from
* code that on SMP might call us without the local
* CPU in the mask.
*/
void on_each_cpu_mask(const struct cpumask *mask,
smp_call_func_t func, void *info, bool wait)
{
unsigned long flags;
if (cpumask_test_cpu(0, mask)) {
local_irq_save(flags);
func(info);
local_irq_restore(flags);
}
}
EXPORT_SYMBOL(on_each_cpu_mask);
/* /*
* Preemption is disabled here to make sure the cond_func is called under the * Preemption is disabled here to make sure the cond_func is called under the
* same condtions in UP and SMP. * same condtions in UP and SMP.
...@@ -75,7 +46,7 @@ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, ...@@ -75,7 +46,7 @@ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
unsigned long flags; unsigned long flags;
preempt_disable(); preempt_disable();
if (cond_func(0, info)) { if ((!cond_func || cond_func(0, info)) && cpumask_test_cpu(0, mask)) {
local_irq_save(flags); local_irq_save(flags);
func(info); func(info);
local_irq_restore(flags); local_irq_restore(flags);
...@@ -84,13 +55,6 @@ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, ...@@ -84,13 +55,6 @@ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
} }
EXPORT_SYMBOL(on_each_cpu_cond_mask); EXPORT_SYMBOL(on_each_cpu_cond_mask);
void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
void *info, bool wait)
{
on_each_cpu_cond_mask(cond_func, func, info, wait, NULL);
}
EXPORT_SYMBOL(on_each_cpu_cond);
int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys) int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
{ {
int ret; int ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment