Commit 75013c6c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf_urgent_for_v5.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Borislav Petkov:

 - Make sure PMU internal buffers are flushed for per-CPU events too and
   properly handle PID/TID for large PEBS.

 - Handle the case properly when there's no PMU and therefore return an
   empty list of perf MSRs for VMX to switch instead of reading random
   garbage from the stack.

* tag 'perf_urgent_for_v5.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/perf: Use RET0 as default for guest_get_msrs to handle "no PMU" case
  perf/x86/intel: Set PERF_ATTACH_SCHED_CB for large PEBS and LBR
  perf/core: Flush PMU internal buffers for per-CPU events
parents 836d7f05 c8e2fe13
...@@ -81,7 +81,11 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx); ...@@ -81,7 +81,11 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases); DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
DEFINE_STATIC_CALL_NULL(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs); /*
* This one is magic, it will get called even when PMU init fails (because
* there is no PMU), in which case it should simply return NULL.
*/
DEFINE_STATIC_CALL_RET0(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs);
u64 __read_mostly hw_cache_event_ids u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_MAX]
...@@ -1944,13 +1948,6 @@ static void _x86_pmu_read(struct perf_event *event) ...@@ -1944,13 +1948,6 @@ static void _x86_pmu_read(struct perf_event *event)
x86_perf_event_update(event); x86_perf_event_update(event);
} }
static inline struct perf_guest_switch_msr *
perf_guest_get_msrs_nop(int *nr)
{
*nr = 0;
return NULL;
}
static int __init init_hw_perf_events(void) static int __init init_hw_perf_events(void)
{ {
struct x86_pmu_quirk *quirk; struct x86_pmu_quirk *quirk;
...@@ -2025,7 +2022,7 @@ static int __init init_hw_perf_events(void) ...@@ -2025,7 +2022,7 @@ static int __init init_hw_perf_events(void)
x86_pmu.read = _x86_pmu_read; x86_pmu.read = _x86_pmu_read;
if (!x86_pmu.guest_get_msrs) if (!x86_pmu.guest_get_msrs)
x86_pmu.guest_get_msrs = perf_guest_get_msrs_nop; x86_pmu.guest_get_msrs = (void *)&__static_call_return0;
x86_pmu_static_call_update(); x86_pmu_static_call_update();
......
...@@ -3662,8 +3662,10 @@ static int intel_pmu_hw_config(struct perf_event *event) ...@@ -3662,8 +3662,10 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) { if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
if (!(event->attr.sample_type & if (!(event->attr.sample_type &
~intel_pmu_large_pebs_flags(event))) ~intel_pmu_large_pebs_flags(event))) {
event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS; event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
event->attach_state |= PERF_ATTACH_SCHED_CB;
}
} }
if (x86_pmu.pebs_aliases) if (x86_pmu.pebs_aliases)
x86_pmu.pebs_aliases(event); x86_pmu.pebs_aliases(event);
...@@ -3676,6 +3678,7 @@ static int intel_pmu_hw_config(struct perf_event *event) ...@@ -3676,6 +3678,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
ret = intel_pmu_setup_lbr_filter(event); ret = intel_pmu_setup_lbr_filter(event);
if (ret) if (ret)
return ret; return ret;
event->attach_state |= PERF_ATTACH_SCHED_CB;
/* /*
* BTS is set up earlier in this path, so don't account twice * BTS is set up earlier in this path, so don't account twice
......
...@@ -6580,8 +6580,8 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) ...@@ -6580,8 +6580,8 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
int i, nr_msrs; int i, nr_msrs;
struct perf_guest_switch_msr *msrs; struct perf_guest_switch_msr *msrs;
/* Note, nr_msrs may be garbage if perf_guest_get_msrs() returns NULL. */
msrs = perf_guest_get_msrs(&nr_msrs); msrs = perf_guest_get_msrs(&nr_msrs);
if (!msrs) if (!msrs)
return; return;
......
...@@ -606,6 +606,7 @@ struct swevent_hlist { ...@@ -606,6 +606,7 @@ struct swevent_hlist {
#define PERF_ATTACH_TASK 0x04 #define PERF_ATTACH_TASK 0x04
#define PERF_ATTACH_TASK_DATA 0x08 #define PERF_ATTACH_TASK_DATA 0x08
#define PERF_ATTACH_ITRACE 0x10 #define PERF_ATTACH_ITRACE 0x10
#define PERF_ATTACH_SCHED_CB 0x20
struct perf_cgroup; struct perf_cgroup;
struct perf_buffer; struct perf_buffer;
...@@ -872,6 +873,7 @@ struct perf_cpu_context { ...@@ -872,6 +873,7 @@ struct perf_cpu_context {
struct list_head cgrp_cpuctx_entry; struct list_head cgrp_cpuctx_entry;
#endif #endif
struct list_head sched_cb_entry;
int sched_cb_usage; int sched_cb_usage;
int online; int online;
......
...@@ -386,6 +386,7 @@ static DEFINE_MUTEX(perf_sched_mutex); ...@@ -386,6 +386,7 @@ static DEFINE_MUTEX(perf_sched_mutex);
static atomic_t perf_sched_count; static atomic_t perf_sched_count;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_mmap_events __read_mostly;
...@@ -3461,11 +3462,16 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, ...@@ -3461,11 +3462,16 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
} }
} }
static DEFINE_PER_CPU(struct list_head, sched_cb_list);
void perf_sched_cb_dec(struct pmu *pmu) void perf_sched_cb_dec(struct pmu *pmu)
{ {
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
--cpuctx->sched_cb_usage; this_cpu_dec(perf_sched_cb_usages);
if (!--cpuctx->sched_cb_usage)
list_del(&cpuctx->sched_cb_entry);
} }
...@@ -3473,7 +3479,10 @@ void perf_sched_cb_inc(struct pmu *pmu) ...@@ -3473,7 +3479,10 @@ void perf_sched_cb_inc(struct pmu *pmu)
{ {
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
cpuctx->sched_cb_usage++; if (!cpuctx->sched_cb_usage++)
list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
this_cpu_inc(perf_sched_cb_usages);
} }
/* /*
...@@ -3502,6 +3511,24 @@ static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in ...@@ -3502,6 +3511,24 @@ static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in
perf_ctx_unlock(cpuctx, cpuctx->task_ctx); perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
} }
static void perf_pmu_sched_task(struct task_struct *prev,
struct task_struct *next,
bool sched_in)
{
struct perf_cpu_context *cpuctx;
if (prev == next)
return;
list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
/* will be handled in perf_event_context_sched_in/out */
if (cpuctx->task_ctx)
continue;
__perf_pmu_sched_task(cpuctx, sched_in);
}
}
static void perf_event_switch(struct task_struct *task, static void perf_event_switch(struct task_struct *task,
struct task_struct *next_prev, bool sched_in); struct task_struct *next_prev, bool sched_in);
...@@ -3524,6 +3551,9 @@ void __perf_event_task_sched_out(struct task_struct *task, ...@@ -3524,6 +3551,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
{ {
int ctxn; int ctxn;
if (__this_cpu_read(perf_sched_cb_usages))
perf_pmu_sched_task(task, next, false);
if (atomic_read(&nr_switch_events)) if (atomic_read(&nr_switch_events))
perf_event_switch(task, next, false); perf_event_switch(task, next, false);
...@@ -3832,6 +3862,9 @@ void __perf_event_task_sched_in(struct task_struct *prev, ...@@ -3832,6 +3862,9 @@ void __perf_event_task_sched_in(struct task_struct *prev,
if (atomic_read(&nr_switch_events)) if (atomic_read(&nr_switch_events))
perf_event_switch(task, prev, true); perf_event_switch(task, prev, true);
if (__this_cpu_read(perf_sched_cb_usages))
perf_pmu_sched_task(prev, task, true);
} }
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
...@@ -4656,7 +4689,7 @@ static void unaccount_event(struct perf_event *event) ...@@ -4656,7 +4689,7 @@ static void unaccount_event(struct perf_event *event)
if (event->parent) if (event->parent)
return; return;
if (event->attach_state & PERF_ATTACH_TASK) if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
dec = true; dec = true;
if (event->attr.mmap || event->attr.mmap_data) if (event->attr.mmap || event->attr.mmap_data)
atomic_dec(&nr_mmap_events); atomic_dec(&nr_mmap_events);
...@@ -11175,7 +11208,7 @@ static void account_event(struct perf_event *event) ...@@ -11175,7 +11208,7 @@ static void account_event(struct perf_event *event)
if (event->parent) if (event->parent)
return; return;
if (event->attach_state & PERF_ATTACH_TASK) if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
inc = true; inc = true;
if (event->attr.mmap || event->attr.mmap_data) if (event->attr.mmap || event->attr.mmap_data)
atomic_inc(&nr_mmap_events); atomic_inc(&nr_mmap_events);
...@@ -12972,6 +13005,7 @@ static void __init perf_event_init_all_cpus(void) ...@@ -12972,6 +13005,7 @@ static void __init perf_event_init_all_cpus(void)
#ifdef CONFIG_CGROUP_PERF #ifdef CONFIG_CGROUP_PERF
INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu)); INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
#endif #endif
INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment