Merge tag 'perf-core-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull performance event updates from Ingo Molnar: - Add AMD Unified Memory Controller (UMC) events introduced with Zen 4 - Simplify & clean up the uncore management code - Fall back from RDPMC to RDMSR on certain uncore PMUs - Improve per-package and cstate event reading - Extend the Intel ref-cycles event to GP counters - Fix Intel MTL event constraints - Improve the Intel hybrid CPU handling code - Micro-optimize the RAPL code - Optimize perf_cgroup_switch() - Improve large AUX area error handling - Misc fixes and cleanups * tag 'perf-core-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits) perf/x86/amd/uncore: Pass through error code for initialization failures, instead of -ENODEV perf/x86/amd/uncore: Fix uninitialized return value in amd_uncore_init() x86/cpu: Fix the AMD Fam 17h, Fam 19h, Zen2 and Zen4 MSR enumerations perf: Optimize perf_cgroup_switch() perf/x86/amd/uncore: Add memory controller support perf/x86/amd/uncore: Add group exclusivity perf/x86/amd/uncore: Use rdmsr if rdpmc is unavailable perf/x86/amd/uncore: Move discovery and registration perf/x86/amd/uncore: Refactor uncore management perf/core: Allow reading package events from perf_event_read_local perf/x86/cstate: Allow reading the package statistics from local CPU perf/x86/intel/pt: Fix kernel-doc comments perf/x86/rapl: Annotate 'struct rapl_pmus' with __counted_by perf/core: Rename perf_proc_update_handler() -> perf_event_max_sample_rate_handler(), for readability perf/x86/rapl: Fix "Using plain integer as NULL pointer" Sparse warning perf/x86/rapl: Use local64_try_cmpxchg in rapl_event_update() perf/x86/rapl: Stop doing cpu_relax() in the local64_cmpxchg() loop in rapl_event_update() perf/core: Bail out early if the request AUX area is out of bound perf/x86/intel: Extend the ref-cycles event to GP counters perf/x86/intel: Fix broken fixed event constraints extension ...

Merge tag 'perf-core-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull performance event updates from Ingo Molnar: - Add AMD Unified Memory Controller (UMC) events introduced with Zen 4 - Simplify & clean up the uncore management code - Fall back from RDPMC to RDMSR on certain uncore PMUs - Improve per-package and cstate event reading - Extend the Intel ref-cycles event to GP counters - Fix Intel MTL event constraints - Improve the Intel hybrid CPU handling code - Micro-optimize the RAPL code - Optimize perf_cgroup_switch() - Improve large AUX area error handling - Misc fixes and cleanups * tag 'perf-core-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits) perf/x86/amd/uncore: Pass through error code for initialization failures, instead of -ENODEV perf/x86/amd/uncore: Fix uninitialized return value in amd_uncore_init() x86/cpu: Fix the AMD Fam 17h, Fam 19h, Zen2 and Zen4 MSR enumerations perf: Optimize perf_cgroup_switch() perf/x86/amd/uncore: Add memory controller support perf/x86/amd/uncore: Add group exclusivity perf/x86/amd/uncore: Use rdmsr if rdpmc is unavailable perf/x86/amd/uncore: Move discovery and registration perf/x86/amd/uncore: Refactor uncore management perf/core: Allow reading package events from perf_event_read_local perf/x86/cstate: Allow reading the package statistics from local CPU perf/x86/intel/pt: Fix kernel-doc comments perf/x86/rapl: Annotate 'struct rapl_pmus' with __counted_by perf/core: Rename perf_proc_update_handler() -> perf_event_max_sample_rate_handler(), for readability perf/x86/rapl: Fix "Using plain integer as NULL pointer" Sparse warning perf/x86/rapl: Use local64_try_cmpxchg in rapl_event_update() perf/x86/rapl: Stop doing cpu_relax() in the local64_cmpxchg() loop in rapl_event_update() perf/core: Bail out early if the request AUX area is out of bound perf/x86/intel: Extend the ref-cycles event to GP counters perf/x86/intel: Fix broken fixed event constraints extension ...
bceb7acc · Linus Torvalds · cd063c8b · 744940f1 · bceb7acc · bceb7acc
Commit bceb7acc authored Oct 30, 2023 by Linus Torvalds
14 changed files
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1887,9 +1887,9 @@ ssize_t events_hybrid_sysfs_show(struct device *dev,

 	str = pmu_attr->event_str;
 	for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
-		if (!(x86_pmu.hybrid_pmu[i].cpu_type & pmu_attr->pmu_type))
+		if (!(x86_pmu.hybrid_pmu[i].pmu_type & pmu_attr->pmu_type))
 			continue;
-		if (x86_pmu.hybrid_pmu[i].cpu_type & pmu->cpu_type) {
+		if (x86_pmu.hybrid_pmu[i].pmu_type & pmu->pmu_type) {
 			next_str = strchr(str, ';');
 			if (next_str)
 				return snprintf(page, next_str - str + 1, "%s", str);
@@ -2169,7 +2169,7 @@ static int __init init_hw_perf_events(void)
 			hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_EXTENDED_HW_TYPE;

 			err = perf_pmu_register(&hybrid_pmu->pmu, hybrid_pmu->name,
-						(hybrid_pmu->cpu_type == hybrid_big) ? PERF_TYPE_RAW : -1);
+						(hybrid_pmu->pmu_type == hybrid_big) ? PERF_TYPE_RAW : -1);
 			if (err)
 				break;
 		}

--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -336,6 +336,9 @@ static int cstate_pmu_event_init(struct perf_event *event)
 		cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
 		if (!(pkg_msr_mask & (1 << cfg)))
 			return -EINVAL;
+
+		event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
+
 		event->hw.event_base = pkg_msr[cfg].msr;
 		cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
 				      topology_die_cpumask(event->cpu));

--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -261,7 +261,7 @@ static u64 __adl_latency_data_small(struct perf_event *event, u64 status,
 {
 	u64 val;

-	WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big);
+	WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big);

 	dse &= PERF_PEBS_DATA_SOURCE_MASK;
 	val = hybrid_var(event->pmu, pebs_data_source)[dse];
@@ -1058,7 +1058,7 @@ struct event_constraint intel_icl_pebs_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };

-struct event_constraint intel_spr_pebs_event_constraints[] = {
+struct event_constraint intel_glc_pebs_event_constraints[] = {
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),	/* INST_RETIRED.PREC_DIST */
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),


--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -736,6 +736,7 @@ static bool topa_table_full(struct topa *topa)
 /**
 * topa_insert_pages() - create a list of ToPA tables
 * @buf:	PT buffer being initialized.
+ * @cpu:	CPU on which to allocate.
 * @gfp:	Allocation flags.
 *
 * This initializes a list of ToPA tables with entries from
@@ -1207,8 +1208,11 @@ static void pt_buffer_fini_topa(struct pt_buffer *buf)
 /**
 * pt_buffer_init_topa() - initialize ToPA table for pt buffer
 * @buf:	PT buffer.
- * @size:	Total size of all regions within this ToPA.
+ * @cpu:	CPU on which to allocate.
+ * @nr_pages:	No. of pages to allocate.
 * @gfp:	Allocation flags.
+ *
+ * Return:	0 on success or error code.
 */
 static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu,
 			       unsigned long nr_pages, gfp_t gfp)
@@ -1281,7 +1285,7 @@ static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages)

 /**
 * pt_buffer_setup_aux() - set up topa tables for a PT buffer
- * @cpu:	Cpu on which to allocate, -1 means current.
+ * @event:	Performance event
 * @pages:	Array of pointers to buffer pages passed from perf core.
 * @nr_pages:	Number of pages in the buffer.
 * @snapshot:	If this is a snapshot/overwrite counter.

--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -652,10 +652,29 @@ enum {
 #define PERF_PEBS_DATA_SOURCE_MAX	0x10
 #define PERF_PEBS_DATA_SOURCE_MASK	(PERF_PEBS_DATA_SOURCE_MAX - 1)

+enum hybrid_cpu_type {
+	HYBRID_INTEL_NONE,
+	HYBRID_INTEL_ATOM	= 0x20,
+	HYBRID_INTEL_CORE	= 0x40,
+};
+
+enum hybrid_pmu_type {
+	not_hybrid,
+	hybrid_small		= BIT(0),
+	hybrid_big		= BIT(1),
+
+	hybrid_big_small	= hybrid_big | hybrid_small, /* only used for matching */
+};
+
+#define X86_HYBRID_PMU_ATOM_IDX		0
+#define X86_HYBRID_PMU_CORE_IDX		1
+
+#define X86_HYBRID_NUM_PMUS		2
+
 struct x86_hybrid_pmu {
 	struct pmu			pmu;
 	const char			*name;
-	u8				cpu_type;
+	enum hybrid_pmu_type		pmu_type;
 	cpumask_t			supported_cpus;
 	union perf_capabilities		intel_cap;
 	u64				intel_ctrl;
@@ -721,18 +740,6 @@ extern struct static_key_false perf_is_hybrid;
 	__Fp;						\
 })

-enum hybrid_pmu_type {
-	hybrid_big		= 0x40,
-	hybrid_small		= 0x20,
-
-	hybrid_big_small	= hybrid_big | hybrid_small,
-};
-
-#define X86_HYBRID_PMU_ATOM_IDX		0
-#define X86_HYBRID_PMU_CORE_IDX		1
-
-#define X86_HYBRID_NUM_PMUS		2
-
 /*
 * struct x86_pmu - generic x86 pmu
 */
@@ -940,7 +947,7 @@ struct x86_pmu {
 	 */
 	int				num_hybrid_pmus;
 	struct x86_hybrid_pmu		*hybrid_pmu;
-	u8 (*get_hybrid_cpu_type)	(void);
+	enum hybrid_cpu_type (*get_hybrid_cpu_type)	(void);
 };

 struct x86_perf_task_context_opt {
@@ -1521,7 +1528,7 @@ extern struct event_constraint intel_skl_pebs_event_constraints[];

 extern struct event_constraint intel_icl_pebs_event_constraints[];

-extern struct event_constraint intel_spr_pebs_event_constraints[];
+extern struct event_constraint intel_glc_pebs_event_constraints[];

 struct event_constraint *intel_pebs_constraints(struct perf_event *event);


--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -115,7 +115,7 @@ struct rapl_pmu {
 struct rapl_pmus {
 	struct pmu		pmu;
 	unsigned int		maxdie;
-	struct rapl_pmu		*pmus[];
+	struct rapl_pmu		*pmus[] __counted_by(maxdie);
 };

 enum rapl_unit_quirk {
@@ -179,15 +179,11 @@ static u64 rapl_event_update(struct perf_event *event)
 	s64 delta, sdelta;
 	int shift = RAPL_CNTR_WIDTH;

-again:
 	prev_raw_count = local64_read(&hwc->prev_count);
-	rdmsrl(event->hw.event_base, new_raw_count);
-
-	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-			    new_raw_count) != prev_raw_count) {
-		cpu_relax();
-		goto again;
-	}
+	do {
+		rdmsrl(event->hw.event_base, new_raw_count);
+	} while (!local64_try_cmpxchg(&hwc->prev_count,
+				      &prev_raw_count, new_raw_count));

 	/*
 	 * Now we have the new raw value and have updated the prev
@@ -537,11 +533,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = {
 * - want to use same event codes across both architectures
 */
 static struct perf_msr amd_rapl_msrs[] = {
-	[PERF_RAPL_PP0]  = { 0, &rapl_events_cores_group, 0, false, 0 },
+	[PERF_RAPL_PP0]  = { 0, &rapl_events_cores_group, NULL, false, 0 },
 	[PERF_RAPL_PKG]  = { MSR_AMD_PKG_ENERGY_STATUS,  &rapl_events_pkg_group,   test_msr, false, RAPL_MSR_MASK },
-	[PERF_RAPL_RAM]  = { 0, &rapl_events_ram_group,   0, false, 0 },
-	[PERF_RAPL_PP1]  = { 0, &rapl_events_gpu_group,   0, false, 0 },
-	[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group,  0, false, 0 },
+	[PERF_RAPL_RAM]  = { 0, &rapl_events_ram_group,   NULL, false, 0 },
+	[PERF_RAPL_PP1]  = { 0, &rapl_events_gpu_group,   NULL, false, 0 },
+	[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group,  NULL, false, 0 },
 };

 static int rapl_cpu_offline(unsigned int cpu)

--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -638,12 +638,16 @@
 #define MSR_AMD64_LBR_SELECT			0xc000010e

 /* Zen4 */
-#define MSR_ZEN4_BP_CFG			0xc001102e
+#define MSR_ZEN4_BP_CFG                 0xc001102e
 #define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5

+/* Fam 19h MSRs */
+#define MSR_F19H_UMC_PERF_CTL           0xc0010800
+#define MSR_F19H_UMC_PERF_CTR           0xc0010801
+
 /* Zen 2 */
-#define MSR_ZEN2_SPECTRAL_CHICKEN	0xc00110e3
-#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT	BIT_ULL(1)
+#define MSR_ZEN2_SPECTRAL_CHICKEN       0xc00110e3
+#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT   BIT_ULL(1)

 /* Fam 17h MSRs */
 #define MSR_F17H_IRPERF			0xc00000e9

--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -112,6 +112,13 @@
 	(AMD64_PERFMON_V2_EVENTSEL_EVENT_NB	|	\
 	 AMD64_PERFMON_V2_EVENTSEL_UMASK_NB)

+#define AMD64_PERFMON_V2_ENABLE_UMC			BIT_ULL(31)
+#define AMD64_PERFMON_V2_EVENTSEL_EVENT_UMC		GENMASK_ULL(7, 0)
+#define AMD64_PERFMON_V2_EVENTSEL_RDWRMASK_UMC		GENMASK_ULL(9, 8)
+#define AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC		\
+	(AMD64_PERFMON_V2_EVENTSEL_EVENT_UMC	|	\
+	 AMD64_PERFMON_V2_EVENTSEL_RDWRMASK_UMC)
+
 #define AMD64_NUM_COUNTERS				4
 #define AMD64_NUM_COUNTERS_CORE				6
 #define AMD64_NUM_COUNTERS_NB				4
@@ -232,6 +239,8 @@ union cpuid_0x80000022_ebx {
 		unsigned int	lbr_v2_stack_sz:6;
 		/* Number of Data Fabric Counters */
 		unsigned int	num_df_pmc:6;
+		/* Number of Unified Memory Controller Counters */
+		unsigned int	num_umc_pmc:6;
 	} split;
 	unsigned int		full;
 };

--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -879,6 +879,7 @@ struct perf_event_pmu_context {
 	unsigned int			embedded : 1;

 	unsigned int			nr_events;
+	unsigned int			nr_cgroups;

 	atomic_t			refcount; /* event <-> epc */
 	struct rcu_head			rcu_head;
@@ -1574,7 +1575,7 @@ extern int sysctl_perf_cpu_time_max_percent;

 extern void perf_sample_event_took(u64 sample_len_ns);

-int perf_proc_update_handler(struct ctl_table *table, int write,
+int perf_event_max_sample_rate_handler(struct ctl_table *table, int write,
 		void *buffer, size_t *lenp, loff_t *ppos);
 int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
 		void *buffer, size_t *lenp, loff_t *ppos);

--- a/kernel/events/core.c
+++ b/kernel/events/core.c
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -700,6 +700,12 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
 		watermark = 0;
 	}

+	/*
+	 * kcalloc_node() is unable to allocate buffer if the size is larger
+	 * than: PAGE_SIZE << MAX_ORDER; directly bail out in this case.
+	 */
+	if (get_order((unsigned long)nr_pages * sizeof(void *)) > MAX_ORDER)
+		return -ENOMEM;
 	rb->aux_pages = kcalloc_node(nr_pages, sizeof(void *), GFP_KERNEL,
 				     node);
 	if (!rb->aux_pages)

--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1983,7 +1983,7 @@ static struct ctl_table kern_table[] = {
 		.data		= &sysctl_perf_event_sample_rate,
 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
 		.mode		= 0644,
-		.proc_handler	= perf_proc_update_handler,
+		.proc_handler	= perf_event_max_sample_rate_handler,
 		.extra1		= SYSCTL_ONE,
 	},
 	{