Commit 576a997c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf-core-2024-07-16' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull performance events updates from Ingo Molnar:

 - Intel PT support enhancements & fixes

 - Fix leaked SIGTRAP events

 - Improve and fix the Intel uncore driver

 - Add support for Intel HBM and CXL uncore counters

 - Add Intel Lake and Arrow Lake support

 - AMD uncore driver fixes

 - Make SIGTRAP and __perf_pending_irq() work on RT

 - Micro-optimizations

 - Misc cleanups and fixes

* tag 'perf-core-2024-07-16' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits)
  perf/x86/intel: Add a distinct name for Granite Rapids
  perf/x86/intel/ds: Fix non 0 retire latency on Raptorlake
  perf/x86/intel: Hide Topdown metrics events if the feature is not enumerated
  perf/x86/intel/uncore: Fix the bits of the CHA extended umask for SPR
  perf: Split __perf_pending_irq() out of perf_pending_irq()
  perf: Don't disable preemption in perf_pending_task().
  perf: Move swevent_htable::recursion into task_struct.
  perf: Shrink the size of the recursion counter.
  perf: Enqueue SIGTRAP always via task_work.
  task_work: Add TWA_NMI_CURRENT as an additional notify mode.
  perf: Move irq_work_queue() where the event is prepared.
  perf: Fix event leak upon exec and file release
  perf: Fix event leak upon exit
  task_work: Introduce task_work_cancel() again
  task_work: s/task_work_cancel()/task_work_cancel_func()/
  perf/x86/amd/uncore: Fix DF and UMC domain identification
  perf/x86/amd/uncore: Avoid PMU registration if counters are unavailable
  perf/x86/intel: Support Perfmon MSRs aliasing
  perf/x86/intel: Support PERFEVTSEL extension
  perf/x86: Add config_mask to represent EVENTSEL bitmask
  ...
parents 4a996d90 fa0c1c9d
...@@ -432,8 +432,10 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, ...@@ -432,8 +432,10 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
* be removed on one CPU at a time AND PMU is disabled * be removed on one CPU at a time AND PMU is disabled
* when we come here * when we come here
*/ */
for (i = 0; i < x86_pmu.num_counters; i++) { for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (cmpxchg(nb->owners + i, event, NULL) == event) struct perf_event *tmp = event;
if (try_cmpxchg(nb->owners + i, &tmp, NULL))
break; break;
} }
} }
...@@ -499,7 +501,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev ...@@ -499,7 +501,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
* because of successive calls to x86_schedule_events() from * because of successive calls to x86_schedule_events() from
* hw_perf_group_sched_in() without hw_perf_enable() * hw_perf_group_sched_in() without hw_perf_enable()
*/ */
for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) { for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) {
if (new == -1 || hwc->idx == idx) if (new == -1 || hwc->idx == idx)
/* assign free slot, prefer hwc->idx */ /* assign free slot, prefer hwc->idx */
old = cmpxchg(nb->owners + idx, NULL, event); old = cmpxchg(nb->owners + idx, NULL, event);
...@@ -542,7 +544,7 @@ static struct amd_nb *amd_alloc_nb(int cpu) ...@@ -542,7 +544,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
/* /*
* initialize all possible NB constraints * initialize all possible NB constraints
*/ */
for (i = 0; i < x86_pmu.num_counters; i++) { for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
__set_bit(i, nb->event_constraints[i].idxmsk); __set_bit(i, nb->event_constraints[i].idxmsk);
nb->event_constraints[i].weight = 1; nb->event_constraints[i].weight = 1;
} }
...@@ -735,7 +737,7 @@ static void amd_pmu_check_overflow(void) ...@@ -735,7 +737,7 @@ static void amd_pmu_check_overflow(void)
* counters are always enabled when this function is called and * counters are always enabled when this function is called and
* ARCH_PERFMON_EVENTSEL_INT is always set. * ARCH_PERFMON_EVENTSEL_INT is always set.
*/ */
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (!test_bit(idx, cpuc->active_mask)) if (!test_bit(idx, cpuc->active_mask))
continue; continue;
...@@ -755,7 +757,7 @@ static void amd_pmu_enable_all(int added) ...@@ -755,7 +757,7 @@ static void amd_pmu_enable_all(int added)
amd_brs_enable_all(); amd_brs_enable_all();
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
/* only activate events which are marked as active */ /* only activate events which are marked as active */
if (!test_bit(idx, cpuc->active_mask)) if (!test_bit(idx, cpuc->active_mask))
continue; continue;
...@@ -978,7 +980,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) ...@@ -978,7 +980,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
/* Clear any reserved bits set by buggy microcode */ /* Clear any reserved bits set by buggy microcode */
status &= amd_pmu_global_cntr_mask; status &= amd_pmu_global_cntr_mask;
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (!test_bit(idx, cpuc->active_mask)) if (!test_bit(idx, cpuc->active_mask))
continue; continue;
...@@ -1313,7 +1315,7 @@ static __initconst const struct x86_pmu amd_pmu = { ...@@ -1313,7 +1315,7 @@ static __initconst const struct x86_pmu amd_pmu = {
.addr_offset = amd_pmu_addr_offset, .addr_offset = amd_pmu_addr_offset,
.event_map = amd_pmu_event_map, .event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map), .max_events = ARRAY_SIZE(amd_perfmon_event_map),
.num_counters = AMD64_NUM_COUNTERS, .cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0),
.add = amd_pmu_add_event, .add = amd_pmu_add_event,
.del = amd_pmu_del_event, .del = amd_pmu_del_event,
.cntval_bits = 48, .cntval_bits = 48,
...@@ -1412,7 +1414,7 @@ static int __init amd_core_pmu_init(void) ...@@ -1412,7 +1414,7 @@ static int __init amd_core_pmu_init(void)
*/ */
x86_pmu.eventsel = MSR_F15H_PERF_CTL; x86_pmu.eventsel = MSR_F15H_PERF_CTL;
x86_pmu.perfctr = MSR_F15H_PERF_CTR; x86_pmu.perfctr = MSR_F15H_PERF_CTR;
x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; x86_pmu.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0);
/* Check for Performance Monitoring v2 support */ /* Check for Performance Monitoring v2 support */
if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) { if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
...@@ -1422,9 +1424,9 @@ static int __init amd_core_pmu_init(void) ...@@ -1422,9 +1424,9 @@ static int __init amd_core_pmu_init(void)
x86_pmu.version = 2; x86_pmu.version = 2;
/* Find the number of available Core PMCs */ /* Find the number of available Core PMCs */
x86_pmu.num_counters = ebx.split.num_core_pmc; x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0);
amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1; amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64;
/* Update PMC handling functions */ /* Update PMC handling functions */
x86_pmu.enable_all = amd_pmu_v2_enable_all; x86_pmu.enable_all = amd_pmu_v2_enable_all;
...@@ -1452,12 +1454,12 @@ static int __init amd_core_pmu_init(void) ...@@ -1452,12 +1454,12 @@ static int __init amd_core_pmu_init(void)
* even numbered counter that has a consecutive adjacent odd * even numbered counter that has a consecutive adjacent odd
* numbered counter following it. * numbered counter following it.
*/ */
for (i = 0; i < x86_pmu.num_counters - 1; i += 2) for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2)
even_ctr_mask |= BIT_ULL(i); even_ctr_mask |= BIT_ULL(i);
pair_constraint = (struct event_constraint) pair_constraint = (struct event_constraint)
__EVENT_CONSTRAINT(0, even_ctr_mask, 0, __EVENT_CONSTRAINT(0, even_ctr_mask, 0,
x86_pmu.num_counters / 2, 0, x86_pmu_max_num_counters(NULL) / 2, 0,
PERF_X86_EVENT_PAIR); PERF_X86_EVENT_PAIR);
x86_pmu.get_event_constraints = amd_get_event_constraints_f17h; x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
......
...@@ -162,7 +162,9 @@ static int amd_uncore_add(struct perf_event *event, int flags) ...@@ -162,7 +162,9 @@ static int amd_uncore_add(struct perf_event *event, int flags)
/* if not, take the first available counter */ /* if not, take the first available counter */
hwc->idx = -1; hwc->idx = -1;
for (i = 0; i < pmu->num_counters; i++) { for (i = 0; i < pmu->num_counters; i++) {
if (cmpxchg(&ctx->events[i], NULL, event) == NULL) { struct perf_event *tmp = NULL;
if (try_cmpxchg(&ctx->events[i], &tmp, event)) {
hwc->idx = i; hwc->idx = i;
break; break;
} }
...@@ -196,7 +198,9 @@ static void amd_uncore_del(struct perf_event *event, int flags) ...@@ -196,7 +198,9 @@ static void amd_uncore_del(struct perf_event *event, int flags)
event->pmu->stop(event, PERF_EF_UPDATE); event->pmu->stop(event, PERF_EF_UPDATE);
for (i = 0; i < pmu->num_counters; i++) { for (i = 0; i < pmu->num_counters; i++) {
if (cmpxchg(&ctx->events[i], event, NULL) == event) struct perf_event *tmp = event;
if (try_cmpxchg(&ctx->events[i], &tmp, NULL))
break; break;
} }
...@@ -639,7 +643,7 @@ void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) ...@@ -639,7 +643,7 @@ void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
info.split.aux_data = 0; info.split.aux_data = 0;
info.split.num_pmcs = NUM_COUNTERS_NB; info.split.num_pmcs = NUM_COUNTERS_NB;
info.split.gid = 0; info.split.gid = 0;
info.split.cid = topology_die_id(cpu); info.split.cid = topology_logical_package_id(cpu);
if (pmu_version >= 2) { if (pmu_version >= 2) {
ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
...@@ -654,17 +658,20 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu) ...@@ -654,17 +658,20 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
{ {
struct attribute **df_attr = amd_uncore_df_format_attr; struct attribute **df_attr = amd_uncore_df_format_attr;
struct amd_uncore_pmu *pmu; struct amd_uncore_pmu *pmu;
int num_counters;
/* Run just once */ /* Run just once */
if (uncore->init_done) if (uncore->init_done)
return amd_uncore_ctx_init(uncore, cpu); return amd_uncore_ctx_init(uncore, cpu);
num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
if (!num_counters)
goto done;
/* No grouping, single instance for a system */ /* No grouping, single instance for a system */
uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL); uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
if (!uncore->pmus) { if (!uncore->pmus)
uncore->num_pmus = 0;
goto done; goto done;
}
/* /*
* For Family 17h and above, the Northbridge counters are repurposed * For Family 17h and above, the Northbridge counters are repurposed
...@@ -674,7 +681,7 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu) ...@@ -674,7 +681,7 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
pmu = &uncore->pmus[0]; pmu = &uncore->pmus[0];
strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb", strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb",
sizeof(pmu->name)); sizeof(pmu->name));
pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); pmu->num_counters = num_counters;
pmu->msr_base = MSR_F15H_NB_PERF_CTL; pmu->msr_base = MSR_F15H_NB_PERF_CTL;
pmu->rdpmc_base = RDPMC_BASE_NB; pmu->rdpmc_base = RDPMC_BASE_NB;
pmu->group = amd_uncore_ctx_gid(uncore, cpu); pmu->group = amd_uncore_ctx_gid(uncore, cpu);
...@@ -785,17 +792,20 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu) ...@@ -785,17 +792,20 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
{ {
struct attribute **l3_attr = amd_uncore_l3_format_attr; struct attribute **l3_attr = amd_uncore_l3_format_attr;
struct amd_uncore_pmu *pmu; struct amd_uncore_pmu *pmu;
int num_counters;
/* Run just once */ /* Run just once */
if (uncore->init_done) if (uncore->init_done)
return amd_uncore_ctx_init(uncore, cpu); return amd_uncore_ctx_init(uncore, cpu);
num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
if (!num_counters)
goto done;
/* No grouping, single instance for a system */ /* No grouping, single instance for a system */
uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL); uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
if (!uncore->pmus) { if (!uncore->pmus)
uncore->num_pmus = 0;
goto done; goto done;
}
/* /*
* For Family 17h and above, L3 cache counters are available instead * For Family 17h and above, L3 cache counters are available instead
...@@ -805,7 +815,7 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu) ...@@ -805,7 +815,7 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
pmu = &uncore->pmus[0]; pmu = &uncore->pmus[0];
strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2", strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2",
sizeof(pmu->name)); sizeof(pmu->name));
pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); pmu->num_counters = num_counters;
pmu->msr_base = MSR_F16H_L2I_PERF_CTL; pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
pmu->rdpmc_base = RDPMC_BASE_LLC; pmu->rdpmc_base = RDPMC_BASE_LLC;
pmu->group = amd_uncore_ctx_gid(uncore, cpu); pmu->group = amd_uncore_ctx_gid(uncore, cpu);
...@@ -893,8 +903,8 @@ void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) ...@@ -893,8 +903,8 @@ void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx); cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx);
info.split.aux_data = ecx; /* stash active mask */ info.split.aux_data = ecx; /* stash active mask */
info.split.num_pmcs = ebx.split.num_umc_pmc; info.split.num_pmcs = ebx.split.num_umc_pmc;
info.split.gid = topology_die_id(cpu); info.split.gid = topology_logical_package_id(cpu);
info.split.cid = topology_die_id(cpu); info.split.cid = topology_logical_package_id(cpu);
*per_cpu_ptr(uncore->info, cpu) = info; *per_cpu_ptr(uncore->info, cpu) = info;
} }
......
This diff is collapsed.
This diff is collapsed.
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
* MSR_CORE_C1_RES: CORE C1 Residency Counter * MSR_CORE_C1_RES: CORE C1 Residency Counter
* perf code: 0x00 * perf code: 0x00
* Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL
* MTL,SRF,GRR * MTL,SRF,GRR,ARL,LNL
* Scope: Core (each processor core has a MSR) * Scope: Core (each processor core has a MSR)
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
* perf code: 0x01 * perf code: 0x01
...@@ -53,50 +53,50 @@ ...@@ -53,50 +53,50 @@
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
* TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF, * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF,
* GRR * GRR,ARL,LNL
* Scope: Core * Scope: Core
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
* perf code: 0x03 * perf code: 0x03
* Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML, * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
* ICL,TGL,RKL,ADL,RPL,MTL * ICL,TGL,RKL,ADL,RPL,MTL,ARL,LNL
* Scope: Core * Scope: Core
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
* perf code: 0x00 * perf code: 0x00
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
* KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL,
* RPL,SPR,MTL * RPL,SPR,MTL,ARL,LNL
* Scope: Package (physical package) * Scope: Package (physical package)
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
* perf code: 0x01 * perf code: 0x01
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
* GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL, * GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL,
* ADL,RPL,MTL * ADL,RPL,MTL,ARL,LNL
* Scope: Package (physical package) * Scope: Package (physical package)
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
* perf code: 0x02 * perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
* TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF,
* ARL,LNL
* Scope: Package (physical package) * Scope: Package (physical package)
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
* perf code: 0x03 * perf code: 0x03
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL, * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
* KBL,CML,ICL,TGL,RKL,ADL,RPL,MTL * KBL,CML,ICL,TGL,RKL
* Scope: Package (physical package) * Scope: Package (physical package)
* MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
* perf code: 0x04 * perf code: 0x04
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
* ADL,RPL,MTL * ADL,RPL,MTL,ARL
* Scope: Package (physical package) * Scope: Package (physical package)
* MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
* perf code: 0x05 * perf code: 0x05
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL
* ADL,RPL,MTL
* Scope: Package (physical package) * Scope: Package (physical package)
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
* perf code: 0x06 * perf code: 0x06
* Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
* TNT,RKL,ADL,RPL,MTL * TNT,RKL,ADL,RPL,MTL,ARL,LNL
* Scope: Package (physical package) * Scope: Package (physical package)
* MSR_MODULE_C6_RES_MS: Module C6 Residency Counter. * MSR_MODULE_C6_RES_MS: Module C6 Residency Counter.
* perf code: 0x00 * perf code: 0x00
...@@ -637,9 +637,18 @@ static const struct cstate_model adl_cstates __initconst = { ...@@ -637,9 +637,18 @@ static const struct cstate_model adl_cstates __initconst = {
.pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
BIT(PERF_CSTATE_PKG_C3_RES) | BIT(PERF_CSTATE_PKG_C3_RES) |
BIT(PERF_CSTATE_PKG_C6_RES) | BIT(PERF_CSTATE_PKG_C6_RES) |
BIT(PERF_CSTATE_PKG_C7_RES) |
BIT(PERF_CSTATE_PKG_C8_RES) | BIT(PERF_CSTATE_PKG_C8_RES) |
BIT(PERF_CSTATE_PKG_C9_RES) | BIT(PERF_CSTATE_PKG_C10_RES),
};
static const struct cstate_model lnl_cstates __initconst = {
.core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
BIT(PERF_CSTATE_CORE_C6_RES) |
BIT(PERF_CSTATE_CORE_C7_RES),
.pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
BIT(PERF_CSTATE_PKG_C3_RES) |
BIT(PERF_CSTATE_PKG_C6_RES) |
BIT(PERF_CSTATE_PKG_C10_RES), BIT(PERF_CSTATE_PKG_C10_RES),
}; };
...@@ -763,6 +772,10 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { ...@@ -763,6 +772,10 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &adl_cstates), X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &adl_cstates),
X86_MATCH_VFM(INTEL_METEORLAKE, &adl_cstates), X86_MATCH_VFM(INTEL_METEORLAKE, &adl_cstates),
X86_MATCH_VFM(INTEL_METEORLAKE_L, &adl_cstates), X86_MATCH_VFM(INTEL_METEORLAKE_L, &adl_cstates),
X86_MATCH_VFM(INTEL_ARROWLAKE, &adl_cstates),
X86_MATCH_VFM(INTEL_ARROWLAKE_H, &adl_cstates),
X86_MATCH_VFM(INTEL_ARROWLAKE_U, &adl_cstates),
X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_cstates),
{ }, { },
}; };
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
......
This diff is collapsed.
...@@ -303,7 +303,7 @@ static const struct x86_pmu knc_pmu __initconst = { ...@@ -303,7 +303,7 @@ static const struct x86_pmu knc_pmu __initconst = {
.apic = 1, .apic = 1,
.max_period = (1ULL << 39) - 1, .max_period = (1ULL << 39) - 1,
.version = 0, .version = 0,
.num_counters = 2, .cntr_mask64 = 0x3,
.cntval_bits = 40, .cntval_bits = 40,
.cntval_mask = (1ULL << 40) - 1, .cntval_mask = (1ULL << 40) - 1,
.get_event_constraints = x86_get_event_constraints, .get_event_constraints = x86_get_event_constraints,
......
...@@ -919,7 +919,7 @@ static void p4_pmu_disable_all(void) ...@@ -919,7 +919,7 @@ static void p4_pmu_disable_all(void)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx; int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[idx]; struct perf_event *event = cpuc->events[idx];
if (!test_bit(idx, cpuc->active_mask)) if (!test_bit(idx, cpuc->active_mask))
continue; continue;
...@@ -998,7 +998,7 @@ static void p4_pmu_enable_all(int added) ...@@ -998,7 +998,7 @@ static void p4_pmu_enable_all(int added)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx; int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[idx]; struct perf_event *event = cpuc->events[idx];
if (!test_bit(idx, cpuc->active_mask)) if (!test_bit(idx, cpuc->active_mask))
continue; continue;
...@@ -1040,7 +1040,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) ...@@ -1040,7 +1040,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
cpuc = this_cpu_ptr(&cpu_hw_events); cpuc = this_cpu_ptr(&cpu_hw_events);
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
int overflow; int overflow;
if (!test_bit(idx, cpuc->active_mask)) { if (!test_bit(idx, cpuc->active_mask)) {
...@@ -1353,7 +1353,7 @@ static __initconst const struct x86_pmu p4_pmu = { ...@@ -1353,7 +1353,7 @@ static __initconst const struct x86_pmu p4_pmu = {
* though leave it restricted at moment assuming * though leave it restricted at moment assuming
* HT is on * HT is on
*/ */
.num_counters = ARCH_P4_MAX_CCCR, .cntr_mask64 = GENMASK_ULL(ARCH_P4_MAX_CCCR - 1, 0),
.apic = 1, .apic = 1,
.cntval_bits = ARCH_P4_CNTRVAL_BITS, .cntval_bits = ARCH_P4_CNTRVAL_BITS,
.cntval_mask = ARCH_P4_CNTRVAL_MASK, .cntval_mask = ARCH_P4_CNTRVAL_MASK,
...@@ -1395,7 +1395,7 @@ __init int p4_pmu_init(void) ...@@ -1395,7 +1395,7 @@ __init int p4_pmu_init(void)
* *
* Solve this by zero'ing out the registers to mimic a reset. * Solve this by zero'ing out the registers to mimic a reset.
*/ */
for (i = 0; i < x86_pmu.num_counters; i++) { for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
reg = x86_pmu_config_addr(i); reg = x86_pmu_config_addr(i);
wrmsrl_safe(reg, 0ULL); wrmsrl_safe(reg, 0ULL);
} }
......
...@@ -214,7 +214,7 @@ static __initconst const struct x86_pmu p6_pmu = { ...@@ -214,7 +214,7 @@ static __initconst const struct x86_pmu p6_pmu = {
.apic = 1, .apic = 1,
.max_period = (1ULL << 31) - 1, .max_period = (1ULL << 31) - 1,
.version = 0, .version = 0,
.num_counters = 2, .cntr_mask64 = 0x3,
/* /*
* Events have 40 bits implemented. However they are designed such * Events have 40 bits implemented. However they are designed such
* that bits [32-39] are sign extensions of bit 31. As such the * that bits [32-39] are sign extensions of bit 31. As such the
......
...@@ -878,7 +878,7 @@ static void pt_update_head(struct pt *pt) ...@@ -878,7 +878,7 @@ static void pt_update_head(struct pt *pt)
*/ */
static void *pt_buffer_region(struct pt_buffer *buf) static void *pt_buffer_region(struct pt_buffer *buf)
{ {
return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT); return phys_to_virt((phys_addr_t)TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT);
} }
/** /**
...@@ -990,7 +990,7 @@ pt_topa_entry_for_page(struct pt_buffer *buf, unsigned int pg) ...@@ -990,7 +990,7 @@ pt_topa_entry_for_page(struct pt_buffer *buf, unsigned int pg)
* order allocations, there shouldn't be many of these. * order allocations, there shouldn't be many of these.
*/ */
list_for_each_entry(topa, &buf->tables, list) { list_for_each_entry(topa, &buf->tables, list) {
if (topa->offset + topa->size > pg << PAGE_SHIFT) if (topa->offset + topa->size > (unsigned long)pg << PAGE_SHIFT)
goto found; goto found;
} }
......
...@@ -33,8 +33,8 @@ struct topa_entry { ...@@ -33,8 +33,8 @@ struct topa_entry {
u64 rsvd2 : 1; u64 rsvd2 : 1;
u64 size : 4; u64 size : 4;
u64 rsvd3 : 2; u64 rsvd3 : 2;
u64 base : 36; u64 base : 40;
u64 rsvd4 : 16; u64 rsvd4 : 12;
}; };
/* TSC to Core Crystal Clock Ratio */ /* TSC to Core Crystal Clock Ratio */
......
...@@ -264,6 +264,9 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box, ...@@ -264,6 +264,9 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box,
return; return;
} }
if (intel_generic_uncore_assign_hw_event(event, box))
return;
hwc->config_base = uncore_event_ctl(box, hwc->idx); hwc->config_base = uncore_event_ctl(box, hwc->idx);
hwc->event_base = uncore_perf_ctr(box, hwc->idx); hwc->event_base = uncore_perf_ctr(box, hwc->idx);
} }
...@@ -844,7 +847,9 @@ static void uncore_pmu_disable(struct pmu *pmu) ...@@ -844,7 +847,9 @@ static void uncore_pmu_disable(struct pmu *pmu)
static ssize_t uncore_get_attr_cpumask(struct device *dev, static ssize_t uncore_get_attr_cpumask(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask); struct intel_uncore_pmu *pmu = container_of(dev_get_drvdata(dev), struct intel_uncore_pmu, pmu);
return cpumap_print_to_pagebuf(true, buf, &pmu->cpu_mask);
} }
static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
...@@ -861,7 +866,10 @@ static const struct attribute_group uncore_pmu_attr_group = { ...@@ -861,7 +866,10 @@ static const struct attribute_group uncore_pmu_attr_group = {
static inline int uncore_get_box_id(struct intel_uncore_type *type, static inline int uncore_get_box_id(struct intel_uncore_type *type,
struct intel_uncore_pmu *pmu) struct intel_uncore_pmu *pmu)
{ {
return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx; if (type->boxes)
return intel_uncore_find_discovery_unit_id(type->boxes, -1, pmu->pmu_idx);
return pmu->pmu_idx;
} }
void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu) void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
...@@ -962,6 +970,9 @@ static void uncore_type_exit(struct intel_uncore_type *type) ...@@ -962,6 +970,9 @@ static void uncore_type_exit(struct intel_uncore_type *type)
if (type->cleanup_mapping) if (type->cleanup_mapping)
type->cleanup_mapping(type); type->cleanup_mapping(type);
if (type->cleanup_extra_boxes)
type->cleanup_extra_boxes(type);
if (pmu) { if (pmu) {
for (i = 0; i < type->num_boxes; i++, pmu++) { for (i = 0; i < type->num_boxes; i++, pmu++) {
uncore_pmu_unregister(pmu); uncore_pmu_unregister(pmu);
...@@ -970,10 +981,7 @@ static void uncore_type_exit(struct intel_uncore_type *type) ...@@ -970,10 +981,7 @@ static void uncore_type_exit(struct intel_uncore_type *type)
kfree(type->pmus); kfree(type->pmus);
type->pmus = NULL; type->pmus = NULL;
} }
if (type->box_ids) {
kfree(type->box_ids);
type->box_ids = NULL;
}
kfree(type->events_group); kfree(type->events_group);
type->events_group = NULL; type->events_group = NULL;
} }
...@@ -1077,22 +1085,19 @@ static struct intel_uncore_pmu * ...@@ -1077,22 +1085,19 @@ static struct intel_uncore_pmu *
uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev) uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev)
{ {
struct intel_uncore_type **types = uncore_pci_uncores; struct intel_uncore_type **types = uncore_pci_uncores;
struct intel_uncore_discovery_unit *unit;
struct intel_uncore_type *type; struct intel_uncore_type *type;
u64 box_ctl; struct rb_node *node;
int i, die;
for (; *types; types++) { for (; *types; types++) {
type = *types; type = *types;
for (die = 0; die < __uncore_max_dies; die++) {
for (i = 0; i < type->num_boxes; i++) { for (node = rb_first(type->boxes); node; node = rb_next(node)) {
if (!type->box_ctls[die]) unit = rb_entry(node, struct intel_uncore_discovery_unit, node);
continue; if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(unit->addr) &&
box_ctl = type->box_ctls[die] + type->pci_offsets[i]; pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(unit->addr) &&
if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) && pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr))
pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) && return &type->pmus[unit->pmu_idx];
pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl))
return &type->pmus[i];
}
} }
} }
...@@ -1368,28 +1373,25 @@ static struct notifier_block uncore_pci_notifier = { ...@@ -1368,28 +1373,25 @@ static struct notifier_block uncore_pci_notifier = {
static void uncore_pci_pmus_register(void) static void uncore_pci_pmus_register(void)
{ {
struct intel_uncore_type **types = uncore_pci_uncores; struct intel_uncore_type **types = uncore_pci_uncores;
struct intel_uncore_discovery_unit *unit;
struct intel_uncore_type *type; struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu; struct intel_uncore_pmu *pmu;
struct rb_node *node;
struct pci_dev *pdev; struct pci_dev *pdev;
u64 box_ctl;
int i, die;
for (; *types; types++) { for (; *types; types++) {
type = *types; type = *types;
for (die = 0; die < __uncore_max_dies; die++) {
for (i = 0; i < type->num_boxes; i++) { for (node = rb_first(type->boxes); node; node = rb_next(node)) {
if (!type->box_ctls[die]) unit = rb_entry(node, struct intel_uncore_discovery_unit, node);
continue; pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr),
box_ctl = type->box_ctls[die] + type->pci_offsets[i]; UNCORE_DISCOVERY_PCI_BUS(unit->addr),
pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl), UNCORE_DISCOVERY_PCI_DEVFN(unit->addr));
UNCORE_DISCOVERY_PCI_BUS(box_ctl),
UNCORE_DISCOVERY_PCI_DEVFN(box_ctl)); if (!pdev)
if (!pdev) continue;
continue; pmu = &type->pmus[unit->pmu_idx];
pmu = &type->pmus[i]; uncore_pci_pmu_register(pdev, type, pmu, unit->die);
uncore_pci_pmu_register(pdev, type, pmu, die);
}
} }
} }
...@@ -1454,6 +1456,18 @@ static void uncore_pci_exit(void) ...@@ -1454,6 +1456,18 @@ static void uncore_pci_exit(void)
} }
} }
static bool uncore_die_has_box(struct intel_uncore_type *type,
int die, unsigned int pmu_idx)
{
if (!type->boxes)
return true;
if (intel_uncore_find_discovery_unit_id(type->boxes, die, pmu_idx) < 0)
return false;
return true;
}
static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
int new_cpu) int new_cpu)
{ {
...@@ -1469,18 +1483,25 @@ static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, ...@@ -1469,18 +1483,25 @@ static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
if (old_cpu < 0) { if (old_cpu < 0) {
WARN_ON_ONCE(box->cpu != -1); WARN_ON_ONCE(box->cpu != -1);
box->cpu = new_cpu; if (uncore_die_has_box(type, die, pmu->pmu_idx)) {
box->cpu = new_cpu;
cpumask_set_cpu(new_cpu, &pmu->cpu_mask);
}
continue; continue;
} }
WARN_ON_ONCE(box->cpu != old_cpu); WARN_ON_ONCE(box->cpu != -1 && box->cpu != old_cpu);
box->cpu = -1; box->cpu = -1;
cpumask_clear_cpu(old_cpu, &pmu->cpu_mask);
if (new_cpu < 0) if (new_cpu < 0)
continue; continue;
if (!uncore_die_has_box(type, die, pmu->pmu_idx))
continue;
uncore_pmu_cancel_hrtimer(box); uncore_pmu_cancel_hrtimer(box);
perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu); perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
box->cpu = new_cpu; box->cpu = new_cpu;
cpumask_set_cpu(new_cpu, &pmu->cpu_mask);
} }
} }
...@@ -1503,7 +1524,7 @@ static void uncore_box_unref(struct intel_uncore_type **types, int id) ...@@ -1503,7 +1524,7 @@ static void uncore_box_unref(struct intel_uncore_type **types, int id)
pmu = type->pmus; pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) { for (i = 0; i < type->num_boxes; i++, pmu++) {
box = pmu->boxes[id]; box = pmu->boxes[id];
if (box && atomic_dec_return(&box->refcnt) == 0) if (box && box->cpu >= 0 && atomic_dec_return(&box->refcnt) == 0)
uncore_box_exit(box); uncore_box_exit(box);
} }
} }
...@@ -1593,7 +1614,7 @@ static int uncore_box_ref(struct intel_uncore_type **types, ...@@ -1593,7 +1614,7 @@ static int uncore_box_ref(struct intel_uncore_type **types,
pmu = type->pmus; pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) { for (i = 0; i < type->num_boxes; i++, pmu++) {
box = pmu->boxes[id]; box = pmu->boxes[id];
if (box && atomic_inc_return(&box->refcnt) == 1) if (box && box->cpu >= 0 && atomic_inc_return(&box->refcnt) == 1)
uncore_box_init(box); uncore_box_init(box);
} }
} }
......
...@@ -62,7 +62,6 @@ struct intel_uncore_type { ...@@ -62,7 +62,6 @@ struct intel_uncore_type {
unsigned fixed_ctr; unsigned fixed_ctr;
unsigned fixed_ctl; unsigned fixed_ctl;
unsigned box_ctl; unsigned box_ctl;
u64 *box_ctls; /* Unit ctrl addr of the first box of each die */
union { union {
unsigned msr_offset; unsigned msr_offset;
unsigned mmio_offset; unsigned mmio_offset;
...@@ -76,7 +75,6 @@ struct intel_uncore_type { ...@@ -76,7 +75,6 @@ struct intel_uncore_type {
u64 *pci_offsets; u64 *pci_offsets;
u64 *mmio_offsets; u64 *mmio_offsets;
}; };
unsigned *box_ids;
struct event_constraint unconstrainted; struct event_constraint unconstrainted;
struct event_constraint *constraints; struct event_constraint *constraints;
struct intel_uncore_pmu *pmus; struct intel_uncore_pmu *pmus;
...@@ -86,6 +84,7 @@ struct intel_uncore_type { ...@@ -86,6 +84,7 @@ struct intel_uncore_type {
const struct attribute_group *attr_groups[4]; const struct attribute_group *attr_groups[4];
const struct attribute_group **attr_update; const struct attribute_group **attr_update;
struct pmu *pmu; /* for custom pmu ops */ struct pmu *pmu; /* for custom pmu ops */
struct rb_root *boxes;
/* /*
* Uncore PMU would store relevant platform topology configuration here * Uncore PMU would store relevant platform topology configuration here
* to identify which platform component each PMON block of that type is * to identify which platform component each PMON block of that type is
...@@ -98,6 +97,10 @@ struct intel_uncore_type { ...@@ -98,6 +97,10 @@ struct intel_uncore_type {
int (*get_topology)(struct intel_uncore_type *type); int (*get_topology)(struct intel_uncore_type *type);
void (*set_mapping)(struct intel_uncore_type *type); void (*set_mapping)(struct intel_uncore_type *type);
void (*cleanup_mapping)(struct intel_uncore_type *type); void (*cleanup_mapping)(struct intel_uncore_type *type);
/*
* Optional callbacks for extra uncore units cleanup
*/
void (*cleanup_extra_boxes)(struct intel_uncore_type *type);
}; };
#define pmu_group attr_groups[0] #define pmu_group attr_groups[0]
...@@ -125,6 +128,7 @@ struct intel_uncore_pmu { ...@@ -125,6 +128,7 @@ struct intel_uncore_pmu {
int func_id; int func_id;
bool registered; bool registered;
atomic_t activeboxes; atomic_t activeboxes;
cpumask_t cpu_mask;
struct intel_uncore_type *type; struct intel_uncore_type *type;
struct intel_uncore_box **boxes; struct intel_uncore_box **boxes;
}; };
......
This diff is collapsed.
...@@ -113,19 +113,24 @@ struct uncore_unit_discovery { ...@@ -113,19 +113,24 @@ struct uncore_unit_discovery {
}; };
}; };
struct intel_uncore_discovery_unit {
struct rb_node node;
unsigned int pmu_idx; /* The idx of the corresponding PMU */
unsigned int id; /* Unit ID */
unsigned int die; /* Die ID */
u64 addr; /* Unit Control Address */
};
struct intel_uncore_discovery_type { struct intel_uncore_discovery_type {
struct rb_node node; struct rb_node node;
enum uncore_access_type access_type; enum uncore_access_type access_type;
u64 box_ctrl; /* Unit ctrl addr of the first box */ struct rb_root units; /* Unit ctrl addr for all units */
u64 *box_ctrl_die; /* Unit ctrl addr of the first box of each die */
u16 type; /* Type ID of the uncore block */ u16 type; /* Type ID of the uncore block */
u8 num_counters; u8 num_counters;
u8 counter_width; u8 counter_width;
u8 ctl_offset; /* Counter Control 0 offset */ u8 ctl_offset; /* Counter Control 0 offset */
u8 ctr_offset; /* Counter 0 offset */ u8 ctr_offset; /* Counter 0 offset */
u16 num_boxes; /* number of boxes for the uncore block */ u16 num_units; /* number of units */
unsigned int *ids; /* Box IDs */
u64 *box_offset; /* Box offset */
}; };
bool intel_uncore_has_discovery_tables(int *ignore); bool intel_uncore_has_discovery_tables(int *ignore);
...@@ -156,3 +161,10 @@ u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box, ...@@ -156,3 +161,10 @@ u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box,
struct intel_uncore_type ** struct intel_uncore_type **
intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra); intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra);
int intel_uncore_find_discovery_unit_id(struct rb_root *units, int die,
unsigned int pmu_idx);
bool intel_generic_uncore_assign_hw_event(struct perf_event *event,
struct intel_uncore_box *box);
void uncore_find_add_unit(struct intel_uncore_discovery_unit *node,
struct rb_root *root, u16 *num_units);
...@@ -462,6 +462,7 @@ ...@@ -462,6 +462,7 @@
#define SPR_UBOX_DID 0x3250 #define SPR_UBOX_DID 0x3250
/* SPR CHA */ /* SPR CHA */
#define SPR_CHA_EVENT_MASK_EXT 0xffffffff
#define SPR_CHA_PMON_CTL_TID_EN (1 << 16) #define SPR_CHA_PMON_CTL_TID_EN (1 << 16)
#define SPR_CHA_PMON_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ #define SPR_CHA_PMON_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
SPR_CHA_PMON_CTL_TID_EN) SPR_CHA_PMON_CTL_TID_EN)
...@@ -478,6 +479,7 @@ DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55"); ...@@ -478,6 +479,7 @@ DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55");
DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57"); DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57");
DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39"); DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39");
DEFINE_UNCORE_FORMAT_ATTR(umask_ext4, umask, "config:8-15,32-55"); DEFINE_UNCORE_FORMAT_ATTR(umask_ext4, umask, "config:8-15,32-55");
DEFINE_UNCORE_FORMAT_ATTR(umask_ext5, umask, "config:8-15,32-63");
DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
...@@ -5933,10 +5935,11 @@ static int spr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *ev ...@@ -5933,10 +5935,11 @@ static int spr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *ev
struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
bool tie_en = !!(event->hw.config & SPR_CHA_PMON_CTL_TID_EN); bool tie_en = !!(event->hw.config & SPR_CHA_PMON_CTL_TID_EN);
struct intel_uncore_type *type = box->pmu->type; struct intel_uncore_type *type = box->pmu->type;
int id = intel_uncore_find_discovery_unit_id(type->boxes, -1, box->pmu->pmu_idx);
if (tie_en) { if (tie_en) {
reg1->reg = SPR_C0_MSR_PMON_BOX_FILTER0 + reg1->reg = SPR_C0_MSR_PMON_BOX_FILTER0 +
HSWEP_CBO_MSR_OFFSET * type->box_ids[box->pmu->pmu_idx]; HSWEP_CBO_MSR_OFFSET * id;
reg1->config = event->attr.config1 & SPR_CHA_PMON_BOX_FILTER_TID; reg1->config = event->attr.config1 & SPR_CHA_PMON_BOX_FILTER_TID;
reg1->idx = 0; reg1->idx = 0;
} }
...@@ -5958,7 +5961,7 @@ static struct intel_uncore_ops spr_uncore_chabox_ops = { ...@@ -5958,7 +5961,7 @@ static struct intel_uncore_ops spr_uncore_chabox_ops = {
static struct attribute *spr_uncore_cha_formats_attr[] = { static struct attribute *spr_uncore_cha_formats_attr[] = {
&format_attr_event.attr, &format_attr_event.attr,
&format_attr_umask_ext4.attr, &format_attr_umask_ext5.attr,
&format_attr_tid_en2.attr, &format_attr_tid_en2.attr,
&format_attr_edge.attr, &format_attr_edge.attr,
&format_attr_inv.attr, &format_attr_inv.attr,
...@@ -5994,7 +5997,7 @@ ATTRIBUTE_GROUPS(uncore_alias); ...@@ -5994,7 +5997,7 @@ ATTRIBUTE_GROUPS(uncore_alias);
static struct intel_uncore_type spr_uncore_chabox = { static struct intel_uncore_type spr_uncore_chabox = {
.name = "cha", .name = "cha",
.event_mask = SPR_CHA_PMON_EVENT_MASK, .event_mask = SPR_CHA_PMON_EVENT_MASK,
.event_mask_ext = SPR_RAW_EVENT_MASK_EXT, .event_mask_ext = SPR_CHA_EVENT_MASK_EXT,
.num_shared_regs = 1, .num_shared_regs = 1,
.constraints = skx_uncore_chabox_constraints, .constraints = skx_uncore_chabox_constraints,
.ops = &spr_uncore_chabox_ops, .ops = &spr_uncore_chabox_ops,
...@@ -6162,7 +6165,55 @@ static struct intel_uncore_type spr_uncore_mdf = { ...@@ -6162,7 +6165,55 @@ static struct intel_uncore_type spr_uncore_mdf = {
.name = "mdf", .name = "mdf",
}; };
#define UNCORE_SPR_NUM_UNCORE_TYPES 12 static void spr_uncore_mmio_offs8_init_box(struct intel_uncore_box *box)
{
__set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags);
intel_generic_uncore_mmio_init_box(box);
}
static struct intel_uncore_ops spr_uncore_mmio_offs8_ops = {
.init_box = spr_uncore_mmio_offs8_init_box,
.exit_box = uncore_mmio_exit_box,
.disable_box = intel_generic_uncore_mmio_disable_box,
.enable_box = intel_generic_uncore_mmio_enable_box,
.disable_event = intel_generic_uncore_mmio_disable_event,
.enable_event = spr_uncore_mmio_enable_event,
.read_counter = uncore_mmio_read_counter,
};
#define SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT() \
SPR_UNCORE_COMMON_FORMAT(), \
.ops = &spr_uncore_mmio_offs8_ops
static struct event_constraint spr_uncore_cxlcm_constraints[] = {
UNCORE_EVENT_CONSTRAINT(0x02, 0x0f),
UNCORE_EVENT_CONSTRAINT(0x05, 0x0f),
UNCORE_EVENT_CONSTRAINT(0x40, 0xf0),
UNCORE_EVENT_CONSTRAINT(0x41, 0xf0),
UNCORE_EVENT_CONSTRAINT(0x42, 0xf0),
UNCORE_EVENT_CONSTRAINT(0x43, 0xf0),
UNCORE_EVENT_CONSTRAINT(0x4b, 0xf0),
UNCORE_EVENT_CONSTRAINT(0x52, 0xf0),
EVENT_CONSTRAINT_END
};
static struct intel_uncore_type spr_uncore_cxlcm = {
SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT(),
.name = "cxlcm",
.constraints = spr_uncore_cxlcm_constraints,
};
static struct intel_uncore_type spr_uncore_cxldp = {
SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT(),
.name = "cxldp",
};
static struct intel_uncore_type spr_uncore_hbm = {
SPR_UNCORE_COMMON_FORMAT(),
.name = "hbm",
};
#define UNCORE_SPR_NUM_UNCORE_TYPES 15
#define UNCORE_SPR_CHA 0 #define UNCORE_SPR_CHA 0
#define UNCORE_SPR_IIO 1 #define UNCORE_SPR_IIO 1
#define UNCORE_SPR_IMC 6 #define UNCORE_SPR_IMC 6
...@@ -6186,6 +6237,9 @@ static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = { ...@@ -6186,6 +6237,9 @@ static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = {
NULL, NULL,
NULL, NULL,
&spr_uncore_mdf, &spr_uncore_mdf,
&spr_uncore_cxlcm,
&spr_uncore_cxldp,
&spr_uncore_hbm,
}; };
/* /*
...@@ -6198,6 +6252,24 @@ static u64 spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = { ...@@ -6198,6 +6252,24 @@ static u64 spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = {
0, 0x8000, 0x10000, 0x18000 0, 0x8000, 0x10000, 0x18000
}; };
static void spr_extra_boxes_cleanup(struct intel_uncore_type *type)
{
struct intel_uncore_discovery_unit *pos;
struct rb_node *node;
if (!type->boxes)
return;
while (!RB_EMPTY_ROOT(type->boxes)) {
node = rb_first(type->boxes);
pos = rb_entry(node, struct intel_uncore_discovery_unit, node);
rb_erase(node, type->boxes);
kfree(pos);
}
kfree(type->boxes);
type->boxes = NULL;
}
static struct intel_uncore_type spr_uncore_upi = { static struct intel_uncore_type spr_uncore_upi = {
.event_mask = SNBEP_PMON_RAW_EVENT_MASK, .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
.event_mask_ext = SPR_RAW_EVENT_MASK_EXT, .event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
...@@ -6212,10 +6284,11 @@ static struct intel_uncore_type spr_uncore_upi = { ...@@ -6212,10 +6284,11 @@ static struct intel_uncore_type spr_uncore_upi = {
.num_counters = 4, .num_counters = 4,
.num_boxes = SPR_UNCORE_UPI_NUM_BOXES, .num_boxes = SPR_UNCORE_UPI_NUM_BOXES,
.perf_ctr_bits = 48, .perf_ctr_bits = 48,
.perf_ctr = ICX_UPI_PCI_PMON_CTR0, .perf_ctr = ICX_UPI_PCI_PMON_CTR0 - ICX_UPI_PCI_PMON_BOX_CTL,
.event_ctl = ICX_UPI_PCI_PMON_CTL0, .event_ctl = ICX_UPI_PCI_PMON_CTL0 - ICX_UPI_PCI_PMON_BOX_CTL,
.box_ctl = ICX_UPI_PCI_PMON_BOX_CTL, .box_ctl = ICX_UPI_PCI_PMON_BOX_CTL,
.pci_offsets = spr_upi_pci_offsets, .pci_offsets = spr_upi_pci_offsets,
.cleanup_extra_boxes = spr_extra_boxes_cleanup,
}; };
static struct intel_uncore_type spr_uncore_m3upi = { static struct intel_uncore_type spr_uncore_m3upi = {
...@@ -6225,11 +6298,12 @@ static struct intel_uncore_type spr_uncore_m3upi = { ...@@ -6225,11 +6298,12 @@ static struct intel_uncore_type spr_uncore_m3upi = {
.num_counters = 4, .num_counters = 4,
.num_boxes = SPR_UNCORE_UPI_NUM_BOXES, .num_boxes = SPR_UNCORE_UPI_NUM_BOXES,
.perf_ctr_bits = 48, .perf_ctr_bits = 48,
.perf_ctr = ICX_M3UPI_PCI_PMON_CTR0, .perf_ctr = ICX_M3UPI_PCI_PMON_CTR0 - ICX_M3UPI_PCI_PMON_BOX_CTL,
.event_ctl = ICX_M3UPI_PCI_PMON_CTL0, .event_ctl = ICX_M3UPI_PCI_PMON_CTL0 - ICX_M3UPI_PCI_PMON_BOX_CTL,
.box_ctl = ICX_M3UPI_PCI_PMON_BOX_CTL, .box_ctl = ICX_M3UPI_PCI_PMON_BOX_CTL,
.pci_offsets = spr_upi_pci_offsets, .pci_offsets = spr_upi_pci_offsets,
.constraints = icx_uncore_m3upi_constraints, .constraints = icx_uncore_m3upi_constraints,
.cleanup_extra_boxes = spr_extra_boxes_cleanup,
}; };
enum perf_uncore_spr_iio_freerunning_type_id { enum perf_uncore_spr_iio_freerunning_type_id {
...@@ -6460,18 +6534,21 @@ uncore_find_type_by_id(struct intel_uncore_type **types, int type_id) ...@@ -6460,18 +6534,21 @@ uncore_find_type_by_id(struct intel_uncore_type **types, int type_id)
static int uncore_type_max_boxes(struct intel_uncore_type **types, static int uncore_type_max_boxes(struct intel_uncore_type **types,
int type_id) int type_id)
{ {
struct intel_uncore_discovery_unit *unit;
struct intel_uncore_type *type; struct intel_uncore_type *type;
int i, max = 0; struct rb_node *node;
int max = 0;
type = uncore_find_type_by_id(types, type_id); type = uncore_find_type_by_id(types, type_id);
if (!type) if (!type)
return 0; return 0;
for (i = 0; i < type->num_boxes; i++) { for (node = rb_first(type->boxes); node; node = rb_next(node)) {
if (type->box_ids[i] > max) unit = rb_entry(node, struct intel_uncore_discovery_unit, node);
max = type->box_ids[i];
}
if (unit->id > max)
max = unit->id;
}
return max + 1; return max + 1;
} }
...@@ -6513,10 +6590,11 @@ void spr_uncore_cpu_init(void) ...@@ -6513,10 +6590,11 @@ void spr_uncore_cpu_init(void)
static void spr_update_device_location(int type_id) static void spr_update_device_location(int type_id)
{ {
struct intel_uncore_discovery_unit *unit;
struct intel_uncore_type *type; struct intel_uncore_type *type;
struct pci_dev *dev = NULL; struct pci_dev *dev = NULL;
struct rb_root *root;
u32 device, devfn; u32 device, devfn;
u64 *ctls;
int die; int die;
if (type_id == UNCORE_SPR_UPI) { if (type_id == UNCORE_SPR_UPI) {
...@@ -6530,27 +6608,35 @@ static void spr_update_device_location(int type_id) ...@@ -6530,27 +6608,35 @@ static void spr_update_device_location(int type_id)
} else } else
return; return;
ctls = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL); root = kzalloc(sizeof(struct rb_root), GFP_KERNEL);
if (!ctls) { if (!root) {
type->num_boxes = 0; type->num_boxes = 0;
return; return;
} }
*root = RB_ROOT;
while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) { while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) {
if (devfn != dev->devfn)
continue;
die = uncore_device_to_die(dev); die = uncore_device_to_die(dev);
if (die < 0) if (die < 0)
continue; continue;
ctls[die] = pci_domain_nr(dev->bus) << UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET | unit = kzalloc(sizeof(*unit), GFP_KERNEL);
dev->bus->number << UNCORE_DISCOVERY_PCI_BUS_OFFSET | if (!unit)
devfn << UNCORE_DISCOVERY_PCI_DEVFN_OFFSET | continue;
type->box_ctl; unit->die = die;
unit->id = PCI_SLOT(dev->devfn) - PCI_SLOT(devfn);
unit->addr = pci_domain_nr(dev->bus) << UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET |
dev->bus->number << UNCORE_DISCOVERY_PCI_BUS_OFFSET |
devfn << UNCORE_DISCOVERY_PCI_DEVFN_OFFSET |
type->box_ctl;
unit->pmu_idx = unit->id;
uncore_find_add_unit(unit, root, NULL);
} }
type->box_ctls = ctls; type->boxes = root;
} }
int spr_uncore_pci_init(void) int spr_uncore_pci_init(void)
...@@ -6623,7 +6709,7 @@ static struct intel_uncore_type gnr_uncore_b2cmi = { ...@@ -6623,7 +6709,7 @@ static struct intel_uncore_type gnr_uncore_b2cmi = {
}; };
static struct intel_uncore_type gnr_uncore_b2cxl = { static struct intel_uncore_type gnr_uncore_b2cxl = {
SPR_UNCORE_MMIO_COMMON_FORMAT(), SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT(),
.name = "b2cxl", .name = "b2cxl",
}; };
......
...@@ -476,6 +476,14 @@ struct cpu_hw_events { ...@@ -476,6 +476,14 @@ struct cpu_hw_events {
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID) HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID)
#define INTEL_HYBRID_LDLAT_CONSTRAINT(c, n) \
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_LD_HSW)
#define INTEL_HYBRID_STLAT_CONSTRAINT(c, n) \
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_ST_HSW)
/* Event constraint, but match on all event flags too. */ /* Event constraint, but match on all event flags too. */
#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ #define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS) EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
...@@ -655,8 +663,10 @@ enum { ...@@ -655,8 +663,10 @@ enum {
x86_lbr_exclusive_max, x86_lbr_exclusive_max,
}; };
#define PERF_PEBS_DATA_SOURCE_MAX 0x10 #define PERF_PEBS_DATA_SOURCE_MAX 0x100
#define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1) #define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1)
#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10
#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
enum hybrid_cpu_type { enum hybrid_cpu_type {
HYBRID_INTEL_NONE, HYBRID_INTEL_NONE,
...@@ -684,9 +694,16 @@ struct x86_hybrid_pmu { ...@@ -684,9 +694,16 @@ struct x86_hybrid_pmu {
cpumask_t supported_cpus; cpumask_t supported_cpus;
union perf_capabilities intel_cap; union perf_capabilities intel_cap;
u64 intel_ctrl; u64 intel_ctrl;
int max_pebs_events; u64 pebs_events_mask;
int num_counters; u64 config_mask;
int num_counters_fixed; union {
u64 cntr_mask64;
unsigned long cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
union {
u64 fixed_cntr_mask64;
unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
struct event_constraint unconstrained; struct event_constraint unconstrained;
u64 hw_cache_event_ids u64 hw_cache_event_ids
...@@ -770,12 +787,20 @@ struct x86_pmu { ...@@ -770,12 +787,20 @@ struct x86_pmu {
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
unsigned eventsel; unsigned eventsel;
unsigned perfctr; unsigned perfctr;
unsigned fixedctr;
int (*addr_offset)(int index, bool eventsel); int (*addr_offset)(int index, bool eventsel);
int (*rdpmc_index)(int index); int (*rdpmc_index)(int index);
u64 (*event_map)(int); u64 (*event_map)(int);
int max_events; int max_events;
int num_counters; u64 config_mask;
int num_counters_fixed; union {
u64 cntr_mask64;
unsigned long cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
union {
u64 fixed_cntr_mask64;
unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
int cntval_bits; int cntval_bits;
u64 cntval_mask; u64 cntval_mask;
union { union {
...@@ -852,7 +877,7 @@ struct x86_pmu { ...@@ -852,7 +877,7 @@ struct x86_pmu {
pebs_ept :1; pebs_ept :1;
int pebs_record_size; int pebs_record_size;
int pebs_buffer_size; int pebs_buffer_size;
int max_pebs_events; u64 pebs_events_mask;
void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data); void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data);
struct event_constraint *pebs_constraints; struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event); void (*pebs_aliases)(struct perf_event *event);
...@@ -1120,13 +1145,19 @@ static inline unsigned int x86_pmu_event_addr(int index) ...@@ -1120,13 +1145,19 @@ static inline unsigned int x86_pmu_event_addr(int index)
x86_pmu.addr_offset(index, false) : index); x86_pmu.addr_offset(index, false) : index);
} }
static inline unsigned int x86_pmu_fixed_ctr_addr(int index)
{
return x86_pmu.fixedctr + (x86_pmu.addr_offset ?
x86_pmu.addr_offset(index, false) : index);
}
static inline int x86_pmu_rdpmc_index(int index) static inline int x86_pmu_rdpmc_index(int index)
{ {
return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index; return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
} }
bool check_hw_exists(struct pmu *pmu, int num_counters, bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
int num_counters_fixed); unsigned long *fixed_cntr_mask);
int x86_add_exclusive(unsigned int what); int x86_add_exclusive(unsigned int what);
...@@ -1197,8 +1228,32 @@ void x86_pmu_enable_event(struct perf_event *event); ...@@ -1197,8 +1228,32 @@ void x86_pmu_enable_event(struct perf_event *event);
int x86_pmu_handle_irq(struct pt_regs *regs); int x86_pmu_handle_irq(struct pt_regs *regs);
void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed, void x86_pmu_show_pmu_cap(struct pmu *pmu);
u64 intel_ctrl);
static inline int x86_pmu_num_counters(struct pmu *pmu)
{
return hweight64(hybrid(pmu, cntr_mask64));
}
static inline int x86_pmu_max_num_counters(struct pmu *pmu)
{
return fls64(hybrid(pmu, cntr_mask64));
}
static inline int x86_pmu_num_counters_fixed(struct pmu *pmu)
{
return hweight64(hybrid(pmu, fixed_cntr_mask64));
}
static inline int x86_pmu_max_num_counters_fixed(struct pmu *pmu)
{
return fls64(hybrid(pmu, fixed_cntr_mask64));
}
static inline u64 x86_pmu_get_event_config(struct perf_event *event)
{
return event->attr.config & hybrid(event->pmu, config_mask);
}
extern struct event_constraint emptyconstraint; extern struct event_constraint emptyconstraint;
...@@ -1517,9 +1572,11 @@ void intel_pmu_disable_bts(void); ...@@ -1517,9 +1572,11 @@ void intel_pmu_disable_bts(void);
int intel_pmu_drain_bts_buffer(void); int intel_pmu_drain_bts_buffer(void);
u64 adl_latency_data_small(struct perf_event *event, u64 status); u64 grt_latency_data(struct perf_event *event, u64 status);
u64 mtl_latency_data_small(struct perf_event *event, u64 status); u64 cmt_latency_data(struct perf_event *event, u64 status);
u64 lnl_latency_data(struct perf_event *event, u64 status);
extern struct event_constraint intel_core2_pebs_event_constraints[]; extern struct event_constraint intel_core2_pebs_event_constraints[];
...@@ -1551,6 +1608,8 @@ extern struct event_constraint intel_icl_pebs_event_constraints[]; ...@@ -1551,6 +1608,8 @@ extern struct event_constraint intel_icl_pebs_event_constraints[];
extern struct event_constraint intel_glc_pebs_event_constraints[]; extern struct event_constraint intel_glc_pebs_event_constraints[];
extern struct event_constraint intel_lnc_pebs_event_constraints[];
struct event_constraint *intel_pebs_constraints(struct perf_event *event); struct event_constraint *intel_pebs_constraints(struct perf_event *event);
void intel_pmu_pebs_add(struct perf_event *event); void intel_pmu_pebs_add(struct perf_event *event);
...@@ -1640,6 +1699,8 @@ void intel_pmu_pebs_data_source_mtl(void); ...@@ -1640,6 +1699,8 @@ void intel_pmu_pebs_data_source_mtl(void);
void intel_pmu_pebs_data_source_cmt(void); void intel_pmu_pebs_data_source_cmt(void);
void intel_pmu_pebs_data_source_lnl(void);
int intel_pmu_setup_lbr_filter(struct perf_event *event); int intel_pmu_setup_lbr_filter(struct perf_event *event);
void intel_pt_interrupt(void); void intel_pt_interrupt(void);
...@@ -1661,6 +1722,17 @@ static inline int is_ht_workaround_enabled(void) ...@@ -1661,6 +1722,17 @@ static inline int is_ht_workaround_enabled(void)
return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED); return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
} }
static inline u64 intel_pmu_pebs_mask(u64 cntr_mask)
{
return MAX_PEBS_EVENTS_MASK & cntr_mask;
}
static inline int intel_pmu_max_num_pebs(struct pmu *pmu)
{
static_assert(MAX_PEBS_EVENTS == 32);
return fls((u32)hybrid(pmu, pebs_events_mask));
}
#else /* CONFIG_CPU_SUP_INTEL */ #else /* CONFIG_CPU_SUP_INTEL */
static inline void reserve_ds_buffers(void) static inline void reserve_ds_buffers(void)
......
...@@ -530,13 +530,13 @@ __init int zhaoxin_pmu_init(void) ...@@ -530,13 +530,13 @@ __init int zhaoxin_pmu_init(void)
pr_info("Version check pass!\n"); pr_info("Version check pass!\n");
x86_pmu.version = version; x86_pmu.version = version;
x86_pmu.num_counters = eax.split.num_counters; x86_pmu.cntr_mask64 = GENMASK_ULL(eax.split.num_counters - 1, 0);
x86_pmu.cntval_bits = eax.split.bit_width; x86_pmu.cntval_bits = eax.split.bit_width;
x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
x86_pmu.events_maskl = ebx.full; x86_pmu.events_maskl = ebx.full;
x86_pmu.events_mask_len = eax.split.mask_length; x86_pmu.events_mask_len = eax.split.mask_length;
x86_pmu.num_counters_fixed = edx.split.num_counters_fixed; x86_pmu.fixed_cntr_mask64 = GENMASK_ULL(edx.split.num_counters_fixed - 1, 0);
x86_add_quirk(zhaoxin_arch_events_quirk); x86_add_quirk(zhaoxin_arch_events_quirk);
switch (boot_cpu_data.x86) { switch (boot_cpu_data.x86) {
...@@ -604,13 +604,13 @@ __init int zhaoxin_pmu_init(void) ...@@ -604,13 +604,13 @@ __init int zhaoxin_pmu_init(void)
return -ENODEV; return -ENODEV;
} }
x86_pmu.intel_ctrl = (1 << (x86_pmu.num_counters)) - 1; x86_pmu.intel_ctrl = x86_pmu.cntr_mask64;
x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; x86_pmu.intel_ctrl |= x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED;
if (x86_pmu.event_constraints) { if (x86_pmu.event_constraints) {
for_each_event_constraint(c, x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) {
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; c->idxmsk64 |= x86_pmu.cntr_mask64;
c->weight += x86_pmu.num_counters; c->weight += x86_pmu_num_counters(NULL);
} }
} }
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
/* The maximal number of PEBS events: */ /* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS_FMT4 8 #define MAX_PEBS_EVENTS_FMT4 8
#define MAX_PEBS_EVENTS 32 #define MAX_PEBS_EVENTS 32
#define MAX_PEBS_EVENTS_MASK GENMASK_ULL(MAX_PEBS_EVENTS - 1, 0)
#define MAX_FIXED_PEBS_EVENTS 16 #define MAX_FIXED_PEBS_EVENTS 16
/* /*
......
...@@ -566,6 +566,12 @@ ...@@ -566,6 +566,12 @@
#define MSR_RELOAD_PMC0 0x000014c1 #define MSR_RELOAD_PMC0 0x000014c1
#define MSR_RELOAD_FIXED_CTR0 0x00001309 #define MSR_RELOAD_FIXED_CTR0 0x00001309
/* V6 PMON MSR range */
#define MSR_IA32_PMC_V6_GP0_CTR 0x1900
#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901
#define MSR_IA32_PMC_V6_FX0_CTR 0x1980
#define MSR_IA32_PMC_V6_STEP 4
/* KeyID partitioning between MKTME and TDX */ /* KeyID partitioning between MKTME and TDX */
#define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087 #define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087
......
...@@ -32,6 +32,8 @@ ...@@ -32,6 +32,8 @@
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
#define ARCH_PERFMON_EVENTSEL_BR_CNTR (1ULL << 35) #define ARCH_PERFMON_EVENTSEL_BR_CNTR (1ULL << 35)
#define ARCH_PERFMON_EVENTSEL_EQ (1ULL << 36)
#define ARCH_PERFMON_EVENTSEL_UMASK2 (0xFFULL << 40)
#define INTEL_FIXED_BITS_MASK 0xFULL #define INTEL_FIXED_BITS_MASK 0xFULL
#define INTEL_FIXED_BITS_STRIDE 4 #define INTEL_FIXED_BITS_STRIDE 4
...@@ -185,6 +187,8 @@ union cpuid10_edx { ...@@ -185,6 +187,8 @@ union cpuid10_edx {
* detection/enumeration details: * detection/enumeration details:
*/ */
#define ARCH_PERFMON_EXT_LEAF 0x00000023 #define ARCH_PERFMON_EXT_LEAF 0x00000023
#define ARCH_PERFMON_EXT_UMASK2 0x1
#define ARCH_PERFMON_EXT_EQ 0x2
#define ARCH_PERFMON_NUM_COUNTER_LEAF_BIT 0x1 #define ARCH_PERFMON_NUM_COUNTER_LEAF_BIT 0x1
#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1 #define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
...@@ -307,6 +311,10 @@ struct x86_pmu_capability { ...@@ -307,6 +311,10 @@ struct x86_pmu_capability {
#define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3) #define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3)
#define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS) #define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
/* TOPDOWN_BAD_SPECULATION.ALL: fixed counter 4 (Atom only) */
/* TOPDOWN_FE_BOUND.ALL: fixed counter 5 (Atom only) */
/* TOPDOWN_RETIRING.ALL: fixed counter 6 (Atom only) */
static inline bool use_fixed_pseudo_encoding(u64 code) static inline bool use_fixed_pseudo_encoding(u64 code)
{ {
return !(code & 0xff); return !(code & 0xff);
......
...@@ -781,11 +781,12 @@ struct perf_event { ...@@ -781,11 +781,12 @@ struct perf_event {
unsigned int pending_wakeup; unsigned int pending_wakeup;
unsigned int pending_kill; unsigned int pending_kill;
unsigned int pending_disable; unsigned int pending_disable;
unsigned int pending_sigtrap;
unsigned long pending_addr; /* SIGTRAP */ unsigned long pending_addr; /* SIGTRAP */
struct irq_work pending_irq; struct irq_work pending_irq;
struct irq_work pending_disable_irq;
struct callback_head pending_task; struct callback_head pending_task;
unsigned int pending_work; unsigned int pending_work;
struct rcuwait pending_work_wait;
atomic_t event_limit; atomic_t event_limit;
...@@ -962,7 +963,7 @@ struct perf_event_context { ...@@ -962,7 +963,7 @@ struct perf_event_context {
struct rcu_head rcu_head; struct rcu_head rcu_head;
/* /*
* Sum (event->pending_sigtrap + event->pending_work) * Sum (event->pending_work + event->pending_work)
* *
* The SIGTRAP is targeted at ctx->task, as such it won't do changing * The SIGTRAP is targeted at ctx->task, as such it won't do changing
* that until the signal is delivered. * that until the signal is delivered.
...@@ -970,12 +971,6 @@ struct perf_event_context { ...@@ -970,12 +971,6 @@ struct perf_event_context {
local_t nr_pending; local_t nr_pending;
}; };
/*
* Number of contexts where an event can trigger:
* task, softirq, hardirq, nmi.
*/
#define PERF_NR_CONTEXTS 4
struct perf_cpu_pmu_context { struct perf_cpu_pmu_context {
struct perf_event_pmu_context epc; struct perf_event_pmu_context epc;
struct perf_event_pmu_context *task_epc; struct perf_event_pmu_context *task_epc;
......
...@@ -734,6 +734,12 @@ enum perf_event_task_context { ...@@ -734,6 +734,12 @@ enum perf_event_task_context {
perf_nr_task_contexts, perf_nr_task_contexts,
}; };
/*
* Number of contexts where an event can trigger:
* task, softirq, hardirq, nmi.
*/
#define PERF_NR_CONTEXTS 4
struct wake_q_node { struct wake_q_node {
struct wake_q_node *next; struct wake_q_node *next;
}; };
...@@ -1256,6 +1262,7 @@ struct task_struct { ...@@ -1256,6 +1262,7 @@ struct task_struct {
unsigned int futex_state; unsigned int futex_state;
#endif #endif
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
u8 perf_recursion[PERF_NR_CONTEXTS];
struct perf_event_context *perf_event_ctxp; struct perf_event_context *perf_event_ctxp;
struct mutex perf_event_mutex; struct mutex perf_event_mutex;
struct list_head perf_event_list; struct list_head perf_event_list;
......
...@@ -18,6 +18,7 @@ enum task_work_notify_mode { ...@@ -18,6 +18,7 @@ enum task_work_notify_mode {
TWA_RESUME, TWA_RESUME,
TWA_SIGNAL, TWA_SIGNAL,
TWA_SIGNAL_NO_IPI, TWA_SIGNAL_NO_IPI,
TWA_NMI_CURRENT,
}; };
static inline bool task_work_pending(struct task_struct *task) static inline bool task_work_pending(struct task_struct *task)
...@@ -30,7 +31,8 @@ int task_work_add(struct task_struct *task, struct callback_head *twork, ...@@ -30,7 +31,8 @@ int task_work_add(struct task_struct *task, struct callback_head *twork,
struct callback_head *task_work_cancel_match(struct task_struct *task, struct callback_head *task_work_cancel_match(struct task_struct *task,
bool (*match)(struct callback_head *, void *data), void *data); bool (*match)(struct callback_head *, void *data), void *data);
struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t); struct callback_head *task_work_cancel_func(struct task_struct *, task_work_func_t);
bool task_work_cancel(struct task_struct *task, struct callback_head *cb);
void task_work_run(void); void task_work_run(void);
static inline void exit_task_work(struct task_struct *task) static inline void exit_task_work(struct task_struct *task)
......
...@@ -1349,12 +1349,14 @@ union perf_mem_data_src { ...@@ -1349,12 +1349,14 @@ union perf_mem_data_src {
#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
/* 5-0x7 available */ #define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */
#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */
/* 0x7 available */
#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ #define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */
#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ #define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */
#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ #define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */
#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ #define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */
#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ #define PERF_MEM_LVLNUM_NA 0x0f /* N/A */
......
...@@ -29,7 +29,7 @@ static inline size_t perf_callchain_entry__sizeof(void) ...@@ -29,7 +29,7 @@ static inline size_t perf_callchain_entry__sizeof(void)
sysctl_perf_event_max_contexts_per_stack)); sysctl_perf_event_max_contexts_per_stack));
} }
static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); static DEFINE_PER_CPU(u8, callchain_recursion[PERF_NR_CONTEXTS]);
static atomic_t nr_callchain_events; static atomic_t nr_callchain_events;
static DEFINE_MUTEX(callchain_mutex); static DEFINE_MUTEX(callchain_mutex);
static struct callchain_cpus_entries *callchain_cpus_entries; static struct callchain_cpus_entries *callchain_cpus_entries;
......
...@@ -2283,21 +2283,6 @@ event_sched_out(struct perf_event *event, struct perf_event_context *ctx) ...@@ -2283,21 +2283,6 @@ event_sched_out(struct perf_event *event, struct perf_event_context *ctx)
state = PERF_EVENT_STATE_OFF; state = PERF_EVENT_STATE_OFF;
} }
if (event->pending_sigtrap) {
bool dec = true;
event->pending_sigtrap = 0;
if (state != PERF_EVENT_STATE_OFF &&
!event->pending_work) {
event->pending_work = 1;
dec = false;
WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount));
task_work_add(current, &event->pending_task, TWA_RESUME);
}
if (dec)
local_dec(&event->ctx->nr_pending);
}
perf_event_set_state(event, state); perf_event_set_state(event, state);
if (!is_software_event(event)) if (!is_software_event(event))
...@@ -2466,7 +2451,7 @@ static void __perf_event_disable(struct perf_event *event, ...@@ -2466,7 +2451,7 @@ static void __perf_event_disable(struct perf_event *event,
* hold the top-level event's child_mutex, so any descendant that * hold the top-level event's child_mutex, so any descendant that
* goes to exit will block in perf_event_exit_event(). * goes to exit will block in perf_event_exit_event().
* *
* When called from perf_pending_irq it's OK because event->ctx * When called from perf_pending_disable it's OK because event->ctx
* is the current context on this CPU and preemption is disabled, * is the current context on this CPU and preemption is disabled,
* hence we can't get into perf_event_task_sched_out for this context. * hence we can't get into perf_event_task_sched_out for this context.
*/ */
...@@ -2506,7 +2491,7 @@ EXPORT_SYMBOL_GPL(perf_event_disable); ...@@ -2506,7 +2491,7 @@ EXPORT_SYMBOL_GPL(perf_event_disable);
void perf_event_disable_inatomic(struct perf_event *event) void perf_event_disable_inatomic(struct perf_event *event)
{ {
event->pending_disable = 1; event->pending_disable = 1;
irq_work_queue(&event->pending_irq); irq_work_queue(&event->pending_disable_irq);
} }
#define MAX_INTERRUPTS (~0ULL) #define MAX_INTERRUPTS (~0ULL)
...@@ -5206,9 +5191,35 @@ static bool exclusive_event_installable(struct perf_event *event, ...@@ -5206,9 +5191,35 @@ static bool exclusive_event_installable(struct perf_event *event,
static void perf_addr_filters_splice(struct perf_event *event, static void perf_addr_filters_splice(struct perf_event *event,
struct list_head *head); struct list_head *head);
static void perf_pending_task_sync(struct perf_event *event)
{
struct callback_head *head = &event->pending_task;
if (!event->pending_work)
return;
/*
* If the task is queued to the current task's queue, we
* obviously can't wait for it to complete. Simply cancel it.
*/
if (task_work_cancel(current, head)) {
event->pending_work = 0;
local_dec(&event->ctx->nr_pending);
return;
}
/*
* All accesses related to the event are within the same RCU section in
* perf_pending_task(). The RCU grace period before the event is freed
* will make sure all those accesses are complete by then.
*/
rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_UNINTERRUPTIBLE);
}
static void _free_event(struct perf_event *event) static void _free_event(struct perf_event *event)
{ {
irq_work_sync(&event->pending_irq); irq_work_sync(&event->pending_irq);
irq_work_sync(&event->pending_disable_irq);
perf_pending_task_sync(event);
unaccount_event(event); unaccount_event(event);
...@@ -6509,6 +6520,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -6509,6 +6520,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
return -EINVAL; return -EINVAL;
nr_pages = vma_size / PAGE_SIZE; nr_pages = vma_size / PAGE_SIZE;
if (nr_pages > INT_MAX)
return -ENOMEM;
mutex_lock(&event->mmap_mutex); mutex_lock(&event->mmap_mutex);
ret = -EINVAL; ret = -EINVAL;
...@@ -6750,7 +6763,7 @@ static void perf_sigtrap(struct perf_event *event) ...@@ -6750,7 +6763,7 @@ static void perf_sigtrap(struct perf_event *event)
/* /*
* Deliver the pending work in-event-context or follow the context. * Deliver the pending work in-event-context or follow the context.
*/ */
static void __perf_pending_irq(struct perf_event *event) static void __perf_pending_disable(struct perf_event *event)
{ {
int cpu = READ_ONCE(event->oncpu); int cpu = READ_ONCE(event->oncpu);
...@@ -6765,11 +6778,6 @@ static void __perf_pending_irq(struct perf_event *event) ...@@ -6765,11 +6778,6 @@ static void __perf_pending_irq(struct perf_event *event)
* Yay, we hit home and are in the context of the event. * Yay, we hit home and are in the context of the event.
*/ */
if (cpu == smp_processor_id()) { if (cpu == smp_processor_id()) {
if (event->pending_sigtrap) {
event->pending_sigtrap = 0;
perf_sigtrap(event);
local_dec(&event->ctx->nr_pending);
}
if (event->pending_disable) { if (event->pending_disable) {
event->pending_disable = 0; event->pending_disable = 0;
perf_event_disable_local(event); perf_event_disable_local(event);
...@@ -6793,11 +6801,26 @@ static void __perf_pending_irq(struct perf_event *event) ...@@ -6793,11 +6801,26 @@ static void __perf_pending_irq(struct perf_event *event)
* irq_work_queue(); // FAILS * irq_work_queue(); // FAILS
* *
* irq_work_run() * irq_work_run()
* perf_pending_irq() * perf_pending_disable()
* *
* But the event runs on CPU-B and wants disabling there. * But the event runs on CPU-B and wants disabling there.
*/ */
irq_work_queue_on(&event->pending_irq, cpu); irq_work_queue_on(&event->pending_disable_irq, cpu);
}
static void perf_pending_disable(struct irq_work *entry)
{
struct perf_event *event = container_of(entry, struct perf_event, pending_disable_irq);
int rctx;
/*
* If we 'fail' here, that's OK, it means recursion is already disabled
* and we won't recurse 'further'.
*/
rctx = perf_swevent_get_recursion_context();
__perf_pending_disable(event);
if (rctx >= 0)
perf_swevent_put_recursion_context(rctx);
} }
static void perf_pending_irq(struct irq_work *entry) static void perf_pending_irq(struct irq_work *entry)
...@@ -6820,8 +6843,6 @@ static void perf_pending_irq(struct irq_work *entry) ...@@ -6820,8 +6843,6 @@ static void perf_pending_irq(struct irq_work *entry)
perf_event_wakeup(event); perf_event_wakeup(event);
} }
__perf_pending_irq(event);
if (rctx >= 0) if (rctx >= 0)
perf_swevent_put_recursion_context(rctx); perf_swevent_put_recursion_context(rctx);
} }
...@@ -6831,24 +6852,28 @@ static void perf_pending_task(struct callback_head *head) ...@@ -6831,24 +6852,28 @@ static void perf_pending_task(struct callback_head *head)
struct perf_event *event = container_of(head, struct perf_event, pending_task); struct perf_event *event = container_of(head, struct perf_event, pending_task);
int rctx; int rctx;
/*
* All accesses to the event must belong to the same implicit RCU read-side
* critical section as the ->pending_work reset. See comment in
* perf_pending_task_sync().
*/
rcu_read_lock();
/* /*
* If we 'fail' here, that's OK, it means recursion is already disabled * If we 'fail' here, that's OK, it means recursion is already disabled
* and we won't recurse 'further'. * and we won't recurse 'further'.
*/ */
preempt_disable_notrace();
rctx = perf_swevent_get_recursion_context(); rctx = perf_swevent_get_recursion_context();
if (event->pending_work) { if (event->pending_work) {
event->pending_work = 0; event->pending_work = 0;
perf_sigtrap(event); perf_sigtrap(event);
local_dec(&event->ctx->nr_pending); local_dec(&event->ctx->nr_pending);
rcuwait_wake_up(&event->pending_work_wait);
} }
rcu_read_unlock();
if (rctx >= 0) if (rctx >= 0)
perf_swevent_put_recursion_context(rctx); perf_swevent_put_recursion_context(rctx);
preempt_enable_notrace();
put_event(event);
} }
#ifdef CONFIG_GUEST_PERF_EVENTS #ifdef CONFIG_GUEST_PERF_EVENTS
...@@ -9706,16 +9731,26 @@ static int __perf_event_overflow(struct perf_event *event, ...@@ -9706,16 +9731,26 @@ static int __perf_event_overflow(struct perf_event *event,
*/ */
bool valid_sample = sample_is_allowed(event, regs); bool valid_sample = sample_is_allowed(event, regs);
unsigned int pending_id = 1; unsigned int pending_id = 1;
enum task_work_notify_mode notify_mode;
if (regs) if (regs)
pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1; pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1;
if (!event->pending_sigtrap) {
event->pending_sigtrap = pending_id; notify_mode = in_nmi() ? TWA_NMI_CURRENT : TWA_RESUME;
if (!event->pending_work &&
!task_work_add(current, &event->pending_task, notify_mode)) {
event->pending_work = pending_id;
local_inc(&event->ctx->nr_pending); local_inc(&event->ctx->nr_pending);
event->pending_addr = 0;
if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR))
event->pending_addr = data->addr;
} else if (event->attr.exclude_kernel && valid_sample) { } else if (event->attr.exclude_kernel && valid_sample) {
/* /*
* Should not be able to return to user space without * Should not be able to return to user space without
* consuming pending_sigtrap; with exceptions: * consuming pending_work; with exceptions:
* *
* 1. Where !exclude_kernel, events can overflow again * 1. Where !exclude_kernel, events can overflow again
* in the kernel without returning to user space. * in the kernel without returning to user space.
...@@ -9725,13 +9760,8 @@ static int __perf_event_overflow(struct perf_event *event, ...@@ -9725,13 +9760,8 @@ static int __perf_event_overflow(struct perf_event *event,
* To approximate progress (with false negatives), * To approximate progress (with false negatives),
* check 32-bit hash of the current IP. * check 32-bit hash of the current IP.
*/ */
WARN_ON_ONCE(event->pending_sigtrap != pending_id); WARN_ON_ONCE(event->pending_work != pending_id);
} }
event->pending_addr = 0;
if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR))
event->pending_addr = data->addr;
irq_work_queue(&event->pending_irq);
} }
READ_ONCE(event->overflow_handler)(event, data, regs); READ_ONCE(event->overflow_handler)(event, data, regs);
...@@ -9759,11 +9789,7 @@ struct swevent_htable { ...@@ -9759,11 +9789,7 @@ struct swevent_htable {
struct swevent_hlist *swevent_hlist; struct swevent_hlist *swevent_hlist;
struct mutex hlist_mutex; struct mutex hlist_mutex;
int hlist_refcount; int hlist_refcount;
/* Recursion avoidance in each contexts */
int recursion[PERF_NR_CONTEXTS];
}; };
static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
/* /*
...@@ -9961,17 +9987,13 @@ DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]); ...@@ -9961,17 +9987,13 @@ DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);
int perf_swevent_get_recursion_context(void) int perf_swevent_get_recursion_context(void)
{ {
struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); return get_recursion_context(current->perf_recursion);
return get_recursion_context(swhash->recursion);
} }
EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
void perf_swevent_put_recursion_context(int rctx) void perf_swevent_put_recursion_context(int rctx)
{ {
struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); put_recursion_context(current->perf_recursion, rctx);
put_recursion_context(swhash->recursion, rctx);
} }
void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
...@@ -11961,7 +11983,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, ...@@ -11961,7 +11983,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
init_waitqueue_head(&event->waitq); init_waitqueue_head(&event->waitq);
init_irq_work(&event->pending_irq, perf_pending_irq); init_irq_work(&event->pending_irq, perf_pending_irq);
event->pending_disable_irq = IRQ_WORK_INIT_HARD(perf_pending_disable);
init_task_work(&event->pending_task, perf_pending_task); init_task_work(&event->pending_task, perf_pending_task);
rcuwait_init(&event->pending_work_wait);
mutex_init(&event->mmap_mutex); mutex_init(&event->mmap_mutex);
raw_spin_lock_init(&event->addr_filters.lock); raw_spin_lock_init(&event->addr_filters.lock);
...@@ -13637,6 +13661,7 @@ int perf_event_init_task(struct task_struct *child, u64 clone_flags) ...@@ -13637,6 +13661,7 @@ int perf_event_init_task(struct task_struct *child, u64 clone_flags)
{ {
int ret; int ret;
memset(child->perf_recursion, 0, sizeof(child->perf_recursion));
child->perf_event_ctxp = NULL; child->perf_event_ctxp = NULL;
mutex_init(&child->perf_event_mutex); mutex_init(&child->perf_event_mutex);
INIT_LIST_HEAD(&child->perf_event_list); INIT_LIST_HEAD(&child->perf_event_list);
......
...@@ -128,7 +128,7 @@ static inline unsigned long perf_data_size(struct perf_buffer *rb) ...@@ -128,7 +128,7 @@ static inline unsigned long perf_data_size(struct perf_buffer *rb)
static inline unsigned long perf_aux_size(struct perf_buffer *rb) static inline unsigned long perf_aux_size(struct perf_buffer *rb)
{ {
return rb->aux_nr_pages << PAGE_SHIFT; return (unsigned long)rb->aux_nr_pages << PAGE_SHIFT;
} }
#define __DEFINE_OUTPUT_COPY_BODY(advance_buf, memcpy_func, ...) \ #define __DEFINE_OUTPUT_COPY_BODY(advance_buf, memcpy_func, ...) \
...@@ -208,7 +208,7 @@ arch_perf_out_copy_user(void *dst, const void *src, unsigned long n) ...@@ -208,7 +208,7 @@ arch_perf_out_copy_user(void *dst, const void *src, unsigned long n)
DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
static inline int get_recursion_context(int *recursion) static inline int get_recursion_context(u8 *recursion)
{ {
unsigned char rctx = interrupt_context_level(); unsigned char rctx = interrupt_context_level();
...@@ -221,7 +221,7 @@ static inline int get_recursion_context(int *recursion) ...@@ -221,7 +221,7 @@ static inline int get_recursion_context(int *recursion)
return rctx; return rctx;
} }
static inline void put_recursion_context(int *recursion, int rctx) static inline void put_recursion_context(u8 *recursion, unsigned char rctx)
{ {
barrier(); barrier();
recursion[rctx]--; recursion[rctx]--;
......
...@@ -682,13 +682,18 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event, ...@@ -682,13 +682,18 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
if (!has_aux(event)) if (!has_aux(event))
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (nr_pages <= 0)
return -EINVAL;
if (!overwrite) { if (!overwrite) {
/* /*
* Watermark defaults to half the buffer, and so does the * Watermark defaults to half the buffer, and so does the
* max_order, to aid PMU drivers in double buffering. * max_order, to aid PMU drivers in double buffering.
*/ */
if (!watermark) if (!watermark)
watermark = nr_pages << (PAGE_SHIFT - 1); watermark = min_t(unsigned long,
U32_MAX,
(unsigned long)nr_pages << (PAGE_SHIFT - 1));
/* /*
* Use aux_watermark as the basis for chunking to * Use aux_watermark as the basis for chunking to
......
...@@ -1337,7 +1337,7 @@ static int irq_thread(void *data) ...@@ -1337,7 +1337,7 @@ static int irq_thread(void *data)
* synchronize_hardirq(). So neither IRQTF_RUNTHREAD nor the * synchronize_hardirq(). So neither IRQTF_RUNTHREAD nor the
* oneshot mask bit can be set. * oneshot mask bit can be set.
*/ */
task_work_cancel(current, irq_thread_dtor); task_work_cancel_func(current, irq_thread_dtor);
return 0; return 0;
} }
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <linux/irq_work.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/task_work.h> #include <linux/task_work.h>
#include <linux/resume_user_mode.h> #include <linux/resume_user_mode.h>
static struct callback_head work_exited; /* all we need is ->next == NULL */ static struct callback_head work_exited; /* all we need is ->next == NULL */
static void task_work_set_notify_irq(struct irq_work *entry)
{
test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
}
static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) =
IRQ_WORK_INIT_HARD(task_work_set_notify_irq);
/** /**
* task_work_add - ask the @task to execute @work->func() * task_work_add - ask the @task to execute @work->func()
* @task: the task which should run the callback * @task: the task which should run the callback
...@@ -12,7 +20,7 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ ...@@ -12,7 +20,7 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
* @notify: how to notify the targeted task * @notify: how to notify the targeted task
* *
* Queue @work for task_work_run() below and notify the @task if @notify * Queue @work for task_work_run() below and notify the @task if @notify
* is @TWA_RESUME, @TWA_SIGNAL, or @TWA_SIGNAL_NO_IPI. * is @TWA_RESUME, @TWA_SIGNAL, @TWA_SIGNAL_NO_IPI or @TWA_NMI_CURRENT.
* *
* @TWA_SIGNAL works like signals, in that the it will interrupt the targeted * @TWA_SIGNAL works like signals, in that the it will interrupt the targeted
* task and run the task_work, regardless of whether the task is currently * task and run the task_work, regardless of whether the task is currently
...@@ -24,6 +32,8 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ ...@@ -24,6 +32,8 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
* kernel anyway. * kernel anyway.
* @TWA_RESUME work is run only when the task exits the kernel and returns to * @TWA_RESUME work is run only when the task exits the kernel and returns to
* user mode, or before entering guest mode. * user mode, or before entering guest mode.
* @TWA_NMI_CURRENT works like @TWA_RESUME, except it can only be used for the
* current @task and if the current context is NMI.
* *
* Fails if the @task is exiting/exited and thus it can't process this @work. * Fails if the @task is exiting/exited and thus it can't process this @work.
* Otherwise @work->func() will be called when the @task goes through one of * Otherwise @work->func() will be called when the @task goes through one of
...@@ -44,8 +54,13 @@ int task_work_add(struct task_struct *task, struct callback_head *work, ...@@ -44,8 +54,13 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
{ {
struct callback_head *head; struct callback_head *head;
/* record the work call stack in order to print it in KASAN reports */ if (notify == TWA_NMI_CURRENT) {
kasan_record_aux_stack(work); if (WARN_ON_ONCE(task != current))
return -EINVAL;
} else {
/* record the work call stack in order to print it in KASAN reports */
kasan_record_aux_stack(work);
}
head = READ_ONCE(task->task_works); head = READ_ONCE(task->task_works);
do { do {
...@@ -66,6 +81,9 @@ int task_work_add(struct task_struct *task, struct callback_head *work, ...@@ -66,6 +81,9 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
case TWA_SIGNAL_NO_IPI: case TWA_SIGNAL_NO_IPI:
__set_notify_signal(task); __set_notify_signal(task);
break; break;
case TWA_NMI_CURRENT:
irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume));
break;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
break; break;
...@@ -120,9 +138,9 @@ static bool task_work_func_match(struct callback_head *cb, void *data) ...@@ -120,9 +138,9 @@ static bool task_work_func_match(struct callback_head *cb, void *data)
} }
/** /**
* task_work_cancel - cancel a pending work added by task_work_add() * task_work_cancel_func - cancel a pending work matching a function added by task_work_add()
* @task: the task which should execute the work * @task: the task which should execute the func's work
* @func: identifies the work to remove * @func: identifies the func to match with a work to remove
* *
* Find the last queued pending work with ->func == @func and remove * Find the last queued pending work with ->func == @func and remove
* it from queue. * it from queue.
...@@ -131,11 +149,35 @@ static bool task_work_func_match(struct callback_head *cb, void *data) ...@@ -131,11 +149,35 @@ static bool task_work_func_match(struct callback_head *cb, void *data)
* The found work or NULL if not found. * The found work or NULL if not found.
*/ */
struct callback_head * struct callback_head *
task_work_cancel(struct task_struct *task, task_work_func_t func) task_work_cancel_func(struct task_struct *task, task_work_func_t func)
{ {
return task_work_cancel_match(task, task_work_func_match, func); return task_work_cancel_match(task, task_work_func_match, func);
} }
static bool task_work_match(struct callback_head *cb, void *data)
{
return cb == data;
}
/**
* task_work_cancel - cancel a pending work added by task_work_add()
* @task: the task which should execute the work
* @cb: the callback to remove if queued
*
* Remove a callback from a task's queue if queued.
*
* RETURNS:
* True if the callback was queued and got cancelled, false otherwise.
*/
bool task_work_cancel(struct task_struct *task, struct callback_head *cb)
{
struct callback_head *ret;
ret = task_work_cancel_match(task, task_work_match, cb);
return ret == cb;
}
/** /**
* task_work_run - execute the works added by task_work_add() * task_work_run - execute the works added by task_work_add()
* *
...@@ -168,7 +210,7 @@ void task_work_run(void) ...@@ -168,7 +210,7 @@ void task_work_run(void)
if (!work) if (!work)
break; break;
/* /*
* Synchronize with task_work_cancel(). It can not remove * Synchronize with task_work_cancel_match(). It can not remove
* the first entry == work, cmpxchg(task_works) must fail. * the first entry == work, cmpxchg(task_works) must fail.
* But it can remove another entry from the ->next list. * But it can remove another entry from the ->next list.
*/ */
......
...@@ -1694,7 +1694,7 @@ long keyctl_session_to_parent(void) ...@@ -1694,7 +1694,7 @@ long keyctl_session_to_parent(void)
goto unlock; goto unlock;
/* cancel an already pending keyring replacement */ /* cancel an already pending keyring replacement */
oldwork = task_work_cancel(parent, key_change_session_keyring); oldwork = task_work_cancel_func(parent, key_change_session_keyring);
/* the replacement session keyring is applied just prior to userspace /* the replacement session keyring is applied just prior to userspace
* restarting */ * restarting */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment