Commit add76959 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf-core-2022-12-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf events updates from Ingo Molnar:

 - Thoroughly rewrite the data structures that implement perf task
   context handling, with the goal of fixing various quirks and
   unfeatures both in already merged, and in upcoming proposed code.

   The old data structure is the per task and per cpu
   perf_event_contexts:

         task_struct::perf_events_ctxp[] <-> perf_event_context <-> perf_cpu_context
              ^                                 |    ^     |           ^
              `---------------------------------'    |     `--> pmu ---'
                                                     v           ^
                                                perf_event ------'

   In this new design this is replaced with a single task context and a
   single CPU context, plus intermediate data-structures:

         task_struct::perf_event_ctxp -> perf_event_context <- perf_cpu_context
              ^                           |   ^ ^
              `---------------------------'   | |
                                              | |    perf_cpu_pmu_context <--.
                                              | `----.    ^                  |
                                              |      |    |                  |
                                              |      v    v                  |
                                              | ,--> perf_event_pmu_context  |
                                              | |                            |
                                              | |                            |
                                              v v                            |
                                         perf_event ---> pmu ----------------'

   [ See commit bd275681 for more details. ]

   This rewrite was developed by Peter Zijlstra and Ravi Bangoria.

 - Optimize perf_tp_event()

 - Update the Intel uncore PMU driver, extending it with UPI topology
   discovery on various hardware models.

 - Misc fixes & cleanups

* tag 'perf-core-2022-12-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (25 commits)
  perf/x86/intel/uncore: Fix reference count leak in __uncore_imc_init_box()
  perf/x86/intel/uncore: Fix reference count leak in snr_uncore_mmio_map()
  perf/x86/intel/uncore: Fix reference count leak in hswep_has_limit_sbox()
  perf/x86/intel/uncore: Fix reference count leak in sad_cfg_iio_topology()
  perf/x86/intel/uncore: Make set_mapping() procedure void
  perf/x86/intel/uncore: Update sysfs-devices-mapping file
  perf/x86/intel/uncore: Enable UPI topology discovery for Sapphire Rapids
  perf/x86/intel/uncore: Enable UPI topology discovery for Icelake Server
  perf/x86/intel/uncore: Get UPI NodeID and GroupID
  perf/x86/intel/uncore: Enable UPI topology discovery for Skylake Server
  perf/x86/intel/uncore: Generalize get_topology() for SKX PMUs
  perf/x86/intel/uncore: Disable I/O stacks to PMU mapping on ICX-D
  perf/x86/intel/uncore: Clear attr_update properly
  perf/x86/intel/uncore: Introduce UPI topology type
  perf/x86/intel/uncore: Generalize IIO topology support
  perf/core: Don't allow grouping events from different hw pmus
  perf/amd/ibs: Make IBS a core pmu
  perf: Fix function pointer case
  perf/x86/amd: Remove the repeated declaration
  perf: Fix possible memleak in pmu_dev_alloc()
  ...
parents 617fe4fa 17b8d847
What: /sys/devices/uncore_iio_x/dieX
Date: February 2020
Contact: Roman Sudarikov <roman.sudarikov@linux.intel.com>
Contact: Alexander Antonov <alexander.antonov@linux.intel.com>
Description:
Each IIO stack (PCIe root port) has its own IIO PMON block, so
each dieX file (where X is die number) holds "Segment:Root Bus"
......@@ -32,3 +32,31 @@ Description:
IIO PMU 0 on die 1 belongs to PCI RP on bus 0x40, domain 0x0000
IIO PMU 0 on die 2 belongs to PCI RP on bus 0x80, domain 0x0000
IIO PMU 0 on die 3 belongs to PCI RP on bus 0xc0, domain 0x0000
What: /sys/devices/uncore_upi_x/dieX
Date: March 2022
Contact: Alexander Antonov <alexander.antonov@linux.intel.com>
Description:
Each /sys/devices/uncore_upi_X/dieY file holds "upi_Z,die_W"
value that means UPI link number X on die Y is connected to UPI
link Z on die W and this link between sockets can be monitored
by UPI PMON block.
For example, 4-die Sapphire Rapids platform has the following
UPI 0 topology::
# tail /sys/devices/uncore_upi_0/die*
==> /sys/devices/uncore_upi_0/die0 <==
upi_1,die_1
==> /sys/devices/uncore_upi_0/die1 <==
upi_0,die_3
==> /sys/devices/uncore_upi_0/die2 <==
upi_1,die_3
==> /sys/devices/uncore_upi_0/die3 <==
upi_0,die_1
Which means::
UPI link 0 on die 0 is connected to UPI link 1 on die 1
UPI link 0 on die 1 is connected to UPI link 0 on die 3
UPI link 0 on die 2 is connected to UPI link 1 on die 3
UPI link 0 on die 3 is connected to UPI link 0 on die 1
\ No newline at end of file
......@@ -806,10 +806,14 @@ static void armv8pmu_disable_event(struct perf_event *event)
static void armv8pmu_start(struct arm_pmu *cpu_pmu)
{
struct perf_event_context *task_ctx =
this_cpu_ptr(cpu_pmu->pmu.pmu_cpu_context)->task_ctx;
struct perf_event_context *ctx;
int nr_user = 0;
if (sysctl_perf_user_access && task_ctx && task_ctx->nr_user)
ctx = perf_cpu_task_ctx();
if (ctx)
nr_user = ctx->nr_user;
if (sysctl_perf_user_access && nr_user)
armv8pmu_enable_user_access(cpu_pmu);
else
armv8pmu_disable_user_access();
......@@ -1019,10 +1023,10 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
return 0;
}
static int armv8pmu_filter_match(struct perf_event *event)
static bool armv8pmu_filter(struct pmu *pmu, int cpu)
{
unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT;
return evtype != ARMV8_PMUV3_PERFCTR_CHAIN;
struct arm_pmu *armpmu = to_arm_pmu(pmu);
return !cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus);
}
static void armv8pmu_reset(void *info)
......@@ -1254,7 +1258,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
cpu_pmu->stop = armv8pmu_stop;
cpu_pmu->reset = armv8pmu_reset;
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
cpu_pmu->filter_match = armv8pmu_filter_match;
cpu_pmu->filter = armv8pmu_filter;
cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx;
......
......@@ -132,7 +132,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) {}
static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
#endif /* CONFIG_PPC32 */
......@@ -424,7 +424,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event)
cpuhw->bhrb_context = event->ctx;
}
cpuhw->bhrb_users++;
perf_sched_cb_inc(event->ctx->pmu);
perf_sched_cb_inc(event->pmu);
}
static void power_pmu_bhrb_disable(struct perf_event *event)
......@@ -436,7 +436,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
WARN_ON_ONCE(!cpuhw->bhrb_users);
cpuhw->bhrb_users--;
perf_sched_cb_dec(event->ctx->pmu);
perf_sched_cb_dec(event->pmu);
if (!cpuhw->disabled && !cpuhw->bhrb_users) {
/* BHRB cannot be turned off when other
......@@ -451,7 +451,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
/* Called from ctxsw to prevent one process's branch entries to
* mingle with the other process's entries during context switch.
*/
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
if (!ppmu->bhrb_nr)
return;
......
......@@ -377,7 +377,7 @@ static int paicrypt_push_sample(void)
/* Called on schedule-in and schedule-out. No access to event structure,
* but for sampling only event CRYPTO_ALL is allowed.
*/
static void paicrypt_sched_task(struct perf_event_context *ctx, bool sched_in)
static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
/* We started with a clean page on event installation. So read out
* results on schedule_out and if page was dirty, clear values.
......
......@@ -466,7 +466,7 @@ static int paiext_push_sample(void)
/* Called on schedule-in and schedule-out. No access to event structure,
* but for sampling only event NNPA_ALL is allowed.
*/
static void paiext_sched_task(struct perf_event_context *ctx, bool sched_in)
static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
/* We started with a clean page on event installation. So read out
* results on schedule_out and if page was dirty, clear values.
......
......@@ -384,7 +384,7 @@ static void amd_brs_poison_buffer(void)
* On ctxswin, sched_in = true, called after the PMU has started
* On ctxswout, sched_in = false, called before the PMU is stopped
*/
void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in)
void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
......
......@@ -631,7 +631,7 @@ static const struct attribute_group *op_attr_update[] = {
static struct perf_ibs perf_ibs_fetch = {
.pmu = {
.task_ctx_nr = perf_invalid_context,
.task_ctx_nr = perf_hw_context,
.event_init = perf_ibs_init,
.add = perf_ibs_add,
......@@ -655,7 +655,7 @@ static struct perf_ibs perf_ibs_fetch = {
static struct perf_ibs perf_ibs_op = {
.pmu = {
.task_ctx_nr = perf_invalid_context,
.task_ctx_nr = perf_hw_context,
.event_init = perf_ibs_init,
.add = perf_ibs_add,
......
......@@ -352,7 +352,7 @@ void amd_pmu_lbr_add(struct perf_event *event)
cpuc->br_sel = reg->reg;
}
perf_sched_cb_inc(event->ctx->pmu);
perf_sched_cb_inc(event->pmu);
if (!cpuc->lbr_users++ && !event->total_time_running)
amd_pmu_lbr_reset();
......@@ -370,10 +370,10 @@ void amd_pmu_lbr_del(struct perf_event *event)
cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);
perf_sched_cb_dec(event->ctx->pmu);
perf_sched_cb_dec(event->pmu);
}
void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
......
......@@ -90,6 +90,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter);
/*
* This one is magic, it will get called even when PMU init fails (because
* there is no PMU), in which case it should simply return NULL.
......@@ -2031,6 +2033,7 @@ static void x86_pmu_static_call_update(void)
static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs);
static_call_update(x86_pmu_filter, x86_pmu.filter);
}
static void _x86_pmu_read(struct perf_event *event)
......@@ -2052,23 +2055,6 @@ void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
pr_info("... event mask: %016Lx\n", intel_ctrl);
}
/*
* The generic code is not hybrid friendly. The hybrid_pmu->pmu
* of the first registered PMU is unconditionally assigned to
* each possible cpuctx->ctx.pmu.
* Update the correct hybrid PMU to the cpuctx->ctx.pmu.
*/
void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu)
{
struct perf_cpu_context *cpuctx;
if (!pmu->pmu_cpu_context)
return;
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
cpuctx->ctx.pmu = pmu;
}
static int __init init_hw_perf_events(void)
{
struct x86_pmu_quirk *quirk;
......@@ -2175,13 +2161,9 @@ static int __init init_hw_perf_events(void)
if (err)
goto out2;
} else {
u8 cpu_type = get_this_hybrid_cpu_type();
struct x86_hybrid_pmu *hybrid_pmu;
int i, j;
if (!cpu_type && x86_pmu.get_hybrid_cpu_type)
cpu_type = x86_pmu.get_hybrid_cpu_type();
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
hybrid_pmu = &x86_pmu.hybrid_pmu[i];
......@@ -2195,9 +2177,6 @@ static int __init init_hw_perf_events(void)
(hybrid_pmu->cpu_type == hybrid_big) ? PERF_TYPE_RAW : -1);
if (err)
break;
if (cpu_type == hybrid_pmu->cpu_type)
x86_pmu_update_cpu_context(&hybrid_pmu->pmu, raw_smp_processor_id());
}
if (i < x86_pmu.num_hybrid_pmus) {
......@@ -2646,15 +2625,15 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
NULL,
};
static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
static_call_cond(x86_pmu_sched_task)(ctx, sched_in);
static_call_cond(x86_pmu_sched_task)(pmu_ctx, sched_in);
}
static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next)
static void x86_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
struct perf_event_pmu_context *next_epc)
{
static_call_cond(x86_pmu_swap_task_ctx)(prev, next);
static_call_cond(x86_pmu_swap_task_ctx)(prev_epc, next_epc);
}
void perf_check_microcode(void)
......@@ -2689,12 +2668,13 @@ static int x86_pmu_aux_output_match(struct perf_event *event)
return 0;
}
static int x86_pmu_filter_match(struct perf_event *event)
static bool x86_pmu_filter(struct pmu *pmu, int cpu)
{
if (x86_pmu.filter_match)
return x86_pmu.filter_match(event);
bool ret = false;
return 1;
static_call_cond(x86_pmu_filter)(pmu, cpu, &ret);
return ret;
}
static struct pmu pmu = {
......@@ -2725,7 +2705,7 @@ static struct pmu pmu = {
.aux_output_match = x86_pmu_aux_output_match,
.filter_match = x86_pmu_filter_match,
.filter = x86_pmu_filter,
};
void arch_perf_update_userpage(struct perf_event *event,
......
......@@ -4536,8 +4536,6 @@ static bool init_hybrid_pmu(int cpu)
cpumask_set_cpu(cpu, &pmu->supported_cpus);
cpuc->pmu = &pmu->pmu;
x86_pmu_update_cpu_context(&pmu->pmu, cpu);
return true;
}
......@@ -4671,17 +4669,17 @@ static void intel_pmu_cpu_dead(int cpu)
cpumask_clear_cpu(cpu, &hybrid_pmu(cpuc->pmu)->supported_cpus);
}
static void intel_pmu_sched_task(struct perf_event_context *ctx,
static void intel_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
bool sched_in)
{
intel_pmu_pebs_sched_task(ctx, sched_in);
intel_pmu_lbr_sched_task(ctx, sched_in);
intel_pmu_pebs_sched_task(pmu_ctx, sched_in);
intel_pmu_lbr_sched_task(pmu_ctx, sched_in);
}
static void intel_pmu_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next)
static void intel_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
struct perf_event_pmu_context *next_epc)
{
intel_pmu_lbr_swap_task_ctx(prev, next);
intel_pmu_lbr_swap_task_ctx(prev_epc, next_epc);
}
static int intel_pmu_check_period(struct perf_event *event, u64 value)
......@@ -4705,12 +4703,11 @@ static int intel_pmu_aux_output_match(struct perf_event *event)
return is_intel_pt_event(event);
}
static int intel_pmu_filter_match(struct perf_event *event)
static void intel_pmu_filter(struct pmu *pmu, int cpu, bool *ret)
{
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
unsigned int cpu = smp_processor_id();
struct x86_hybrid_pmu *hpmu = hybrid_pmu(pmu);
return cpumask_test_cpu(cpu, &pmu->supported_cpus);
*ret = !cpumask_test_cpu(cpu, &hpmu->supported_cpus);
}
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
......@@ -6413,7 +6410,7 @@ __init int intel_pmu_init(void)
static_call_update(intel_pmu_set_topdown_event_period,
&adl_set_topdown_event_period);
x86_pmu.filter_match = intel_pmu_filter_match;
x86_pmu.filter = intel_pmu_filter;
x86_pmu.get_event_constraints = adl_get_event_constraints;
x86_pmu.hw_config = adl_hw_config;
x86_pmu.limit_period = spr_limit_period;
......
......@@ -1069,7 +1069,7 @@ static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
}
void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
......@@ -1177,7 +1177,7 @@ static void
pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
struct perf_event *event, bool add)
{
struct pmu *pmu = event->ctx->pmu;
struct pmu *pmu = event->pmu;
/*
* Make sure we get updated with the first PEBS
* event. It will trigger also during removal, but
......
......@@ -515,21 +515,21 @@ static void __intel_pmu_lbr_save(void *ctx)
cpuc->last_log_id = ++task_context_opt(ctx)->log_id;
}
void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next)
void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
struct perf_event_pmu_context *next_epc)
{
void *prev_ctx_data, *next_ctx_data;
swap(prev->task_ctx_data, next->task_ctx_data);
swap(prev_epc->task_ctx_data, next_epc->task_ctx_data);
/*
* Architecture specific synchronization makes sense in
* case both prev->task_ctx_data and next->task_ctx_data
* Architecture specific synchronization makes sense in case
* both prev_epc->task_ctx_data and next_epc->task_ctx_data
* pointers are allocated.
*/
prev_ctx_data = next->task_ctx_data;
next_ctx_data = prev->task_ctx_data;
prev_ctx_data = next_epc->task_ctx_data;
next_ctx_data = prev_epc->task_ctx_data;
if (!prev_ctx_data || !next_ctx_data)
return;
......@@ -538,7 +538,7 @@ void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
task_context_opt(next_ctx_data)->lbr_callstack_users);
}
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
void *task_ctx;
......@@ -551,7 +551,7 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
* the task was scheduled out, restore the stack. Otherwise flush
* the LBR stack.
*/
task_ctx = ctx ? ctx->task_ctx_data : NULL;
task_ctx = pmu_ctx ? pmu_ctx->task_ctx_data : NULL;
if (task_ctx) {
if (sched_in)
__intel_pmu_lbr_restore(task_ctx);
......@@ -587,8 +587,8 @@ void intel_pmu_lbr_add(struct perf_event *event)
cpuc->br_sel = event->hw.branch_reg.reg;
if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data)
task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users++;
if (branch_user_callstack(cpuc->br_sel) && event->pmu_ctx->task_ctx_data)
task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users++;
/*
* Request pmu::sched_task() callback, which will fire inside the
......@@ -611,7 +611,7 @@ void intel_pmu_lbr_add(struct perf_event *event)
*/
if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
cpuc->lbr_pebs_users++;
perf_sched_cb_inc(event->ctx->pmu);
perf_sched_cb_inc(event->pmu);
if (!cpuc->lbr_users++ && !event->total_time_running)
intel_pmu_lbr_reset();
}
......@@ -664,8 +664,8 @@ void intel_pmu_lbr_del(struct perf_event *event)
return;
if (branch_user_callstack(cpuc->br_sel) &&
event->ctx->task_ctx_data)
task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users--;
event->pmu_ctx->task_ctx_data)
task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users--;
if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
cpuc->lbr_select = 0;
......@@ -675,7 +675,7 @@ void intel_pmu_lbr_del(struct perf_event *event)
cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);
WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
perf_sched_cb_dec(event->ctx->pmu);
perf_sched_cb_dec(event->pmu);
}
static inline bool vlbr_exclude_host(void)
......
......@@ -2,6 +2,7 @@
#include <linux/slab.h>
#include <linux/pci.h>
#include <asm/apicdef.h>
#include <asm/intel-family.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/perf_event.h>
......@@ -88,12 +89,12 @@ struct intel_uncore_type {
* to identify which platform component each PMON block of that type is
* supposed to monitor.
*/
struct intel_uncore_topology *topology;
struct intel_uncore_topology **topology;
/*
* Optional callbacks for managing mapping of Uncore units to PMONs
*/
int (*get_topology)(struct intel_uncore_type *type);
int (*set_mapping)(struct intel_uncore_type *type);
void (*set_mapping)(struct intel_uncore_type *type);
void (*cleanup_mapping)(struct intel_uncore_type *type);
};
......@@ -178,11 +179,26 @@ struct freerunning_counters {
unsigned *box_offsets;
};
struct intel_uncore_topology {
u64 configuration;
struct uncore_iio_topology {
int pci_bus_no;
int segment;
};
struct uncore_upi_topology {
int die_to;
int pmu_idx_to;
int enabled;
};
struct intel_uncore_topology {
int pmu_idx;
union {
void *untyped;
struct uncore_iio_topology *iio;
struct uncore_upi_topology *upi;
};
};
struct pci2phy_map {
struct list_head list;
int segment;
......
......@@ -1338,6 +1338,7 @@ static void __uncore_imc_init_box(struct intel_uncore_box *box,
/* MCHBAR is disabled */
if (!(mch_bar & BIT(0))) {
pr_warn("perf uncore: MCHBAR is disabled. Failed to map IMC free-running counters.\n");
pci_dev_put(pdev);
return;
}
mch_bar &= ~BIT(0);
......@@ -1352,6 +1353,8 @@ static void __uncore_imc_init_box(struct intel_uncore_box *box,
box->io_addr = ioremap(addr, type->mmio_map_size);
if (!box->io_addr)
pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
pci_dev_put(pdev);
}
static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
......
This diff is collapsed.
......@@ -811,7 +811,7 @@ struct x86_pmu {
void (*cpu_dead)(int cpu);
void (*check_microcode)(void);
void (*sched_task)(struct perf_event_context *ctx,
void (*sched_task)(struct perf_event_pmu_context *pmu_ctx,
bool sched_in);
/*
......@@ -894,12 +894,12 @@ struct x86_pmu {
int num_topdown_events;
/*
* perf task context (i.e. struct perf_event_context::task_ctx_data)
* perf task context (i.e. struct perf_event_pmu_context::task_ctx_data)
* switch helper to bridge calls from perf/core to perf/x86.
* See struct pmu::swap_task_ctx() usage for examples;
*/
void (*swap_task_ctx)(struct perf_event_context *prev,
struct perf_event_context *next);
void (*swap_task_ctx)(struct perf_event_pmu_context *prev_epc,
struct perf_event_pmu_context *next_epc);
/*
* AMD bits
......@@ -925,7 +925,7 @@ struct x86_pmu {
int (*aux_output_match) (struct perf_event *event);
int (*filter_match)(struct perf_event *event);
void (*filter)(struct pmu *pmu, int cpu, bool *ret);
/*
* Hybrid support
*
......@@ -1180,8 +1180,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs);
void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
u64 intel_ctrl);
void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu);
extern struct event_constraint emptyconstraint;
extern struct event_constraint unconstrained;
......@@ -1306,7 +1304,7 @@ void amd_pmu_lbr_reset(void);
void amd_pmu_lbr_read(void);
void amd_pmu_lbr_add(struct perf_event *event);
void amd_pmu_lbr_del(struct perf_event *event);
void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
void amd_pmu_lbr_enable_all(void);
void amd_pmu_lbr_disable_all(void);
int amd_pmu_lbr_hw_config(struct perf_event *event);
......@@ -1322,7 +1320,6 @@ void amd_brs_enable_all(void);
void amd_brs_disable_all(void);
void amd_brs_drain(void);
void amd_brs_lopwr_init(void);
void amd_brs_disable_all(void);
int amd_brs_hw_config(struct perf_event *event);
void amd_brs_reset(void);
......@@ -1330,7 +1327,7 @@ static inline void amd_pmu_brs_add(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
perf_sched_cb_inc(event->ctx->pmu);
perf_sched_cb_inc(event->pmu);
cpuc->lbr_users++;
/*
* No need to reset BRS because it is reset
......@@ -1345,10 +1342,10 @@ static inline void amd_pmu_brs_del(struct perf_event *event)
cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);
perf_sched_cb_dec(event->ctx->pmu);
perf_sched_cb_dec(event->pmu);
}
void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in);
void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
#else
static inline int amd_brs_init(void)
{
......@@ -1373,7 +1370,7 @@ static inline void amd_pmu_brs_del(struct perf_event *event)
{
}
static inline void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in)
static inline void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
}
......@@ -1533,7 +1530,7 @@ void intel_pmu_pebs_enable_all(void);
void intel_pmu_pebs_disable_all(void);
void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
void intel_pmu_auto_reload_read(struct perf_event *event);
......@@ -1541,10 +1538,10 @@ void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
void intel_ds_init(void);
void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next);
void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
struct perf_event_pmu_context *next_epc);
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
u64 lbr_from_signext_quirk_wr(u64 val);
......
......@@ -547,15 +547,14 @@ static void armpmu_disable(struct pmu *pmu)
* microarchitecture, and aren't suitable for another. Thus, only match CPUs of
* the same microarchitecture.
*/
static int armpmu_filter_match(struct perf_event *event)
static bool armpmu_filter(struct pmu *pmu, int cpu)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
unsigned int cpu = smp_processor_id();
int ret;
struct arm_pmu *armpmu = to_arm_pmu(pmu);
bool ret;
ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus);
if (ret && armpmu->filter_match)
return armpmu->filter_match(event);
if (ret && armpmu->filter)
return armpmu->filter(pmu, cpu);
return ret;
}
......@@ -882,14 +881,13 @@ struct arm_pmu *armpmu_alloc(void)
.start = armpmu_start,
.stop = armpmu_stop,
.read = armpmu_read,
.filter_match = armpmu_filter_match,
.filter = armpmu_filter,
.attr_groups = pmu->attr_groups,
/*
* This is a CPU PMU potentially in a heterogeneous
* configuration (e.g. big.LITTLE). This is not an uncore PMU,
* and we have taken ctx sharing into account (e.g. with our
* pmu::filter_match callback and pmu::event_init group
* validation).
* pmu::filter callback and pmu::event_init group validation).
*/
.capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS | PERF_PMU_CAP_EXTENDED_REGS,
};
......
......@@ -100,7 +100,7 @@ struct arm_pmu {
void (*stop)(struct arm_pmu *);
void (*reset)(void *);
int (*map_event)(struct perf_event *event);
int (*filter_match)(struct perf_event *event);
bool (*filter)(struct pmu *pmu, int cpu);
int num_events;
bool secure_access; /* 32-bit ARM only */
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
......
......@@ -266,6 +266,7 @@ struct hw_perf_event {
};
struct perf_event;
struct perf_event_pmu_context;
/*
* Common implementation detail of pmu::{start,commit,cancel}_txn
......@@ -308,7 +309,7 @@ struct pmu {
int capabilities;
int __percpu *pmu_disable_count;
struct perf_cpu_context __percpu *pmu_cpu_context;
struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
int task_ctx_nr;
int hrtimer_interval_ms;
......@@ -443,7 +444,7 @@ struct pmu {
/*
* context-switches callback
*/
void (*sched_task) (struct perf_event_context *ctx,
void (*sched_task) (struct perf_event_pmu_context *pmu_ctx,
bool sched_in);
/*
......@@ -457,8 +458,8 @@ struct pmu {
* implementation and Perf core context switch handling callbacks for usage
* examples.
*/
void (*swap_task_ctx) (struct perf_event_context *prev,
struct perf_event_context *next);
void (*swap_task_ctx) (struct perf_event_pmu_context *prev_epc,
struct perf_event_pmu_context *next_epc);
/* optional */
/*
......@@ -522,9 +523,10 @@ struct pmu {
/* optional */
/*
* Filter events for PMU-specific reasons.
* Skip programming this PMU on the given CPU. Typically needed for
* big.LITTLE things.
*/
int (*filter_match) (struct perf_event *event); /* optional */
bool (*filter) (struct pmu *pmu, int cpu); /* optional */
/*
* Check period value for PERF_EVENT_IOC_PERIOD ioctl.
......@@ -695,6 +697,11 @@ struct perf_event {
int group_caps;
struct perf_event *group_leader;
/*
* event->pmu will always point to pmu in which this event belongs.
* Whereas event->pmu_ctx->pmu may point to other pmu when group of
* different pmu events is created.
*/
struct pmu *pmu;
void *pmu_private;
......@@ -720,6 +727,12 @@ struct perf_event {
struct hw_perf_event hw;
struct perf_event_context *ctx;
/*
* event->pmu_ctx points to perf_event_pmu_context in which the event
* is added. This pmu_ctx can be of other pmu for sw event when that
* sw event is part of a group which also contains non-sw events.
*/
struct perf_event_pmu_context *pmu_ctx;
atomic_long_t refcount;
/*
......@@ -812,19 +825,69 @@ struct perf_event {
#endif /* CONFIG_PERF_EVENTS */
};
/*
* ,-----------------------[1:n]----------------------.
* V V
* perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event
* ^ ^ | |
* `--------[1:n]---------' `-[n:1]-> pmu <-[1:n]-'
*
*
* struct perf_event_pmu_context lifetime is refcount based and RCU freed
* (similar to perf_event_context). Locking is as if it were a member of
* perf_event_context; specifically:
*
* modification, both: ctx->mutex && ctx->lock
* reading, either: ctx->mutex || ctx->lock
*
* There is one exception to this; namely put_pmu_ctx() isn't always called
* with ctx->mutex held; this means that as long as we can guarantee the epc
* has events the above rules hold.
*
* Specificially, sys_perf_event_open()'s group_leader case depends on
* ctx->mutex pinning the configuration. Since we hold a reference on
* group_leader (through the filedesc) it can't go away, therefore it's
* associated pmu_ctx must exist and cannot change due to ctx->mutex.
*/
struct perf_event_pmu_context {
struct pmu *pmu;
struct perf_event_context *ctx;
struct list_head pmu_ctx_entry;
struct list_head pinned_active;
struct list_head flexible_active;
/* Used to avoid freeing per-cpu perf_event_pmu_context */
unsigned int embedded : 1;
unsigned int nr_events;
atomic_t refcount; /* event <-> epc */
struct rcu_head rcu_head;
void *task_ctx_data; /* pmu specific data */
/*
* Set when one or more (plausibly active) event can't be scheduled
* due to pmu overcommit or pmu constraints, except tolerant to
* events not necessary to be active due to scheduling constraints,
* such as cgroups.
*/
int rotate_necessary;
};
struct perf_event_groups {
struct rb_root tree;
u64 index;
};
/**
* struct perf_event_context - event context structure
*
* Used as a container for task events and CPU events as well:
*/
struct perf_event_context {
struct pmu *pmu;
/*
* Protect the states of the events in the list,
* nr_active, and the list:
......@@ -837,27 +900,21 @@ struct perf_event_context {
*/
struct mutex mutex;
struct list_head active_ctx_list;
struct list_head pmu_ctx_list;
struct perf_event_groups pinned_groups;
struct perf_event_groups flexible_groups;
struct list_head event_list;
struct list_head pinned_active;
struct list_head flexible_active;
int nr_events;
int nr_active;
int nr_user;
int is_active;
int nr_task_data;
int nr_stat;
int nr_freq;
int rotate_disable;
/*
* Set when nr_events != nr_active, except tolerant to events not
* necessary to be active due to scheduling constraints, such as cgroups.
*/
int rotate_necessary;
refcount_t refcount;
refcount_t refcount; /* event <-> ctx */
struct task_struct *task;
/*
......@@ -878,7 +935,6 @@ struct perf_event_context {
#ifdef CONFIG_CGROUP_PERF
int nr_cgroups; /* cgroup evts */
#endif
void *task_ctx_data; /* pmu specific data */
struct rcu_head rcu_head;
/*
......@@ -896,12 +952,13 @@ struct perf_event_context {
*/
#define PERF_NR_CONTEXTS 4
/**
* struct perf_cpu_context - per cpu event context structure
*/
struct perf_cpu_context {
struct perf_event_context ctx;
struct perf_event_context *task_ctx;
struct perf_cpu_pmu_context {
struct perf_event_pmu_context epc;
struct perf_event_pmu_context *task_epc;
struct list_head sched_cb_entry;
int sched_cb_usage;
int active_oncpu;
int exclusive;
......@@ -909,16 +966,20 @@ struct perf_cpu_context {
struct hrtimer hrtimer;
ktime_t hrtimer_interval;
unsigned int hrtimer_active;
};
/**
* struct perf_event_cpu_context - per cpu event context structure
*/
struct perf_cpu_context {
struct perf_event_context ctx;
struct perf_event_context *task_ctx;
int online;
#ifdef CONFIG_CGROUP_PERF
struct perf_cgroup *cgrp;
struct list_head cgrp_cpuctx_entry;
#endif
struct list_head sched_cb_entry;
int sched_cb_usage;
int online;
/*
* Per-CPU storage for iterators used in visit_groups_merge. The default
* storage is of size 2 to hold the CPU and any CPU event iterators.
......@@ -982,6 +1043,8 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
#ifdef CONFIG_PERF_EVENTS
extern struct perf_event_context *perf_cpu_task_ctx(void);
extern void *perf_aux_output_begin(struct perf_output_handle *handle,
struct perf_event *event);
extern void perf_aux_output_end(struct perf_output_handle *handle,
......@@ -1187,7 +1250,7 @@ static inline int is_software_event(struct perf_event *event)
*/
static inline int in_software_context(struct perf_event *event)
{
return event->ctx->pmu->task_ctx_nr == perf_sw_context;
return event->pmu_ctx->pmu->task_ctx_nr == perf_sw_context;
}
static inline int is_exclusive_pmu(struct pmu *pmu)
......
......@@ -1243,7 +1243,7 @@ struct task_struct {
unsigned int futex_state;
#endif
#ifdef CONFIG_PERF_EVENTS
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
struct perf_event_context *perf_event_ctxp;
struct mutex perf_event_mutex;
struct list_head perf_event_list;
#endif
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment