Commit a64e697c authored by Athira Rajeev's avatar Athira Rajeev Committed by Michael Ellerman

powerpc/perf: power10 Performance Monitoring support

Base enablement patch to register performance monitoring hardware
support for power10. Patch introduce the raw event encoding format,
defines the supported list of events, config fields for the event
attributes and their corresponding bit values which are exported via
sysfs.

Patch also enhances the support function in isa207_common.c to include
power10 pmu hardware.
Reported-by: default avatarkernel test robot <lkp@intel.com>
Signed-off-by: default avatarMadhavan Srinivasan <maddy@linux.ibm.com>
Signed-off-by: default avatarAthira Rajeev <atrajeev@linux.vnet.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1594996707-3727-9-git-send-email-atrajeev@linux.vnet.ibm.com
parent 9908c826
...@@ -9,7 +9,7 @@ obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o ...@@ -9,7 +9,7 @@ obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \ obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \
power5+-pmu.o power6-pmu.o power7-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o \
isa207-common.o power8-pmu.o power9-pmu.o \ isa207-common.o power8-pmu.o power9-pmu.o \
generic-compat-pmu.o generic-compat-pmu.o power10-pmu.o
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o
......
...@@ -2333,6 +2333,8 @@ static int __init init_ppc64_pmu(void) ...@@ -2333,6 +2333,8 @@ static int __init init_ppc64_pmu(void)
return 0; return 0;
else if (!init_power9_pmu()) else if (!init_power9_pmu())
return 0; return 0;
else if (!init_power10_pmu())
return 0;
else if (!init_ppc970_pmu()) else if (!init_ppc970_pmu())
return 0; return 0;
else else
......
...@@ -9,4 +9,5 @@ extern int init_power6_pmu(void); ...@@ -9,4 +9,5 @@ extern int init_power6_pmu(void);
extern int init_power7_pmu(void); extern int init_power7_pmu(void);
extern int init_power8_pmu(void); extern int init_power8_pmu(void);
extern int init_power9_pmu(void); extern int init_power9_pmu(void);
extern int init_power10_pmu(void);
extern int init_generic_compat_pmu(void); extern int init_generic_compat_pmu(void);
...@@ -55,7 +55,9 @@ static bool is_event_valid(u64 event) ...@@ -55,7 +55,9 @@ static bool is_event_valid(u64 event)
{ {
u64 valid_mask = EVENT_VALID_MASK; u64 valid_mask = EVENT_VALID_MASK;
if (cpu_has_feature(CPU_FTR_ARCH_300)) if (cpu_has_feature(CPU_FTR_ARCH_31))
valid_mask = p10_EVENT_VALID_MASK;
else if (cpu_has_feature(CPU_FTR_ARCH_300))
valid_mask = p9_EVENT_VALID_MASK; valid_mask = p9_EVENT_VALID_MASK;
return !(event & ~valid_mask); return !(event & ~valid_mask);
...@@ -69,6 +71,14 @@ static inline bool is_event_marked(u64 event) ...@@ -69,6 +71,14 @@ static inline bool is_event_marked(u64 event)
return false; return false;
} }
static unsigned long sdar_mod_val(u64 event)
{
if (cpu_has_feature(CPU_FTR_ARCH_31))
return p10_SDAR_MODE(event);
return p9_SDAR_MODE(event);
}
static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
{ {
/* /*
...@@ -79,7 +89,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) ...@@ -79,7 +89,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
* MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling
* mode and will be un-changed when setting MMCRA[63] (Marked events). * mode and will be un-changed when setting MMCRA[63] (Marked events).
* *
* Incase of Power9: * Incase of Power9/power10:
* Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'), * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
* or if group already have any marked events. * or if group already have any marked events.
* For rest * For rest
...@@ -90,8 +100,8 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) ...@@ -90,8 +100,8 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE)) if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
*mmcra &= MMCRA_SDAR_MODE_NO_UPDATES; *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
else if (p9_SDAR_MODE(event)) else if (sdar_mod_val(event))
*mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; *mmcra |= sdar_mod_val(event) << MMCRA_SDAR_MODE_SHIFT;
else else
*mmcra |= MMCRA_SDAR_MODE_DCACHE; *mmcra |= MMCRA_SDAR_MODE_DCACHE;
} else } else
...@@ -134,7 +144,11 @@ static bool is_thresh_cmp_valid(u64 event) ...@@ -134,7 +144,11 @@ static bool is_thresh_cmp_valid(u64 event)
/* /*
* Check the mantissa upper two bits are not zero, unless the * Check the mantissa upper two bits are not zero, unless the
* exponent is also zero. See the THRESH_CMP_MANTISSA doc. * exponent is also zero. See the THRESH_CMP_MANTISSA doc.
* Power10: thresh_cmp is replaced by l2_l3 event select.
*/ */
if (cpu_has_feature(CPU_FTR_ARCH_31))
return false;
cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
exp = cmp >> 7; exp = cmp >> 7;
...@@ -251,7 +265,12 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) ...@@ -251,7 +265,12 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; if (cpu_has_feature(CPU_FTR_ARCH_31))
cache = (event >> EVENT_CACHE_SEL_SHIFT) &
p10_EVENT_CACHE_SEL_MASK;
else
cache = (event >> EVENT_CACHE_SEL_SHIFT) &
EVENT_CACHE_SEL_MASK;
ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK; ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK;
if (pmc) { if (pmc) {
...@@ -283,7 +302,10 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) ...@@ -283,7 +302,10 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
} }
if (unit >= 6 && unit <= 9) { if (unit >= 6 && unit <= 9) {
if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) {
mask |= CNST_L2L3_GROUP_MASK;
value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT);
} else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
mask |= CNST_CACHE_GROUP_MASK; mask |= CNST_CACHE_GROUP_MASK;
value |= CNST_CACHE_GROUP_VAL(event & 0xff); value |= CNST_CACHE_GROUP_VAL(event & 0xff);
...@@ -367,6 +389,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev, ...@@ -367,6 +389,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
struct perf_event *pevents[]) struct perf_event *pevents[])
{ {
unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val; unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
unsigned long mmcr3;
unsigned int pmc, pmc_inuse; unsigned int pmc, pmc_inuse;
int i; int i;
...@@ -379,7 +402,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev, ...@@ -379,7 +402,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
pmc_inuse |= 1 << pmc; pmc_inuse |= 1 << pmc;
} }
mmcra = mmcr1 = mmcr2 = 0; mmcra = mmcr1 = mmcr2 = mmcr3 = 0;
/* Second pass: assign PMCs, set all MMCR1 fields */ /* Second pass: assign PMCs, set all MMCR1 fields */
for (i = 0; i < n_ev; ++i) { for (i = 0; i < n_ev; ++i) {
...@@ -438,9 +461,18 @@ int isa207_compute_mmcr(u64 event[], int n_ev, ...@@ -438,9 +461,18 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
mmcra |= val << MMCRA_THR_CTL_SHIFT; mmcra |= val << MMCRA_THR_CTL_SHIFT;
val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK; val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK;
mmcra |= val << MMCRA_THR_SEL_SHIFT; mmcra |= val << MMCRA_THR_SEL_SHIFT;
val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
val = (event[i] >> EVENT_THR_CMP_SHIFT) &
EVENT_THR_CMP_MASK;
mmcra |= thresh_cmp_val(val); mmcra |= thresh_cmp_val(val);
} }
}
if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) {
val = (event[i] >> p10_L2L3_EVENT_SHIFT) &
p10_EVENT_L2L3_SEL_MASK;
mmcr2 |= val << p10_L2L3_SEL_SHIFT;
}
if (event[i] & EVENT_WANTS_BHRB) { if (event[i] & EVENT_WANTS_BHRB) {
val = (event[i] >> EVENT_IFM_SHIFT) & EVENT_IFM_MASK; val = (event[i] >> EVENT_IFM_SHIFT) & EVENT_IFM_MASK;
...@@ -460,6 +492,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev, ...@@ -460,6 +492,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
mmcr2 |= MMCR2_FCS(pmc); mmcr2 |= MMCR2_FCS(pmc);
} }
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
if (pmc <= 4) {
val = (event[i] >> p10_EVENT_MMCR3_SHIFT) &
p10_EVENT_MMCR3_MASK;
mmcr3 |= val << MMCR3_SHIFT(pmc);
}
}
hwc[i] = pmc - 1; hwc[i] = pmc - 1;
} }
...@@ -480,6 +520,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev, ...@@ -480,6 +520,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
mmcr->mmcr1 = mmcr1; mmcr->mmcr1 = mmcr1;
mmcr->mmcra = mmcra; mmcr->mmcra = mmcra;
mmcr->mmcr2 = mmcr2; mmcr->mmcr2 = mmcr2;
mmcr->mmcr3 = mmcr3;
return 0; return 0;
} }
......
...@@ -87,6 +87,31 @@ ...@@ -87,6 +87,31 @@
EVENT_LINUX_MASK | \ EVENT_LINUX_MASK | \
EVENT_PSEL_MASK)) EVENT_PSEL_MASK))
/* Contants to support power10 raw encoding format */
#define p10_SDAR_MODE_SHIFT 22
#define p10_SDAR_MODE_MASK 0x3ull
#define p10_SDAR_MODE(v) (((v) >> p10_SDAR_MODE_SHIFT) & \
p10_SDAR_MODE_MASK)
#define p10_EVENT_L2L3_SEL_MASK 0x1f
#define p10_L2L3_SEL_SHIFT 3
#define p10_L2L3_EVENT_SHIFT 40
#define p10_EVENT_THRESH_MASK 0xffffull
#define p10_EVENT_CACHE_SEL_MASK 0x3ull
#define p10_EVENT_MMCR3_MASK 0x7fffull
#define p10_EVENT_MMCR3_SHIFT 45
#define p10_EVENT_VALID_MASK \
((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \
(p10_EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \
(EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \
(p10_EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \
(EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \
(EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \
(p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \
(p10_EVENT_MMCR3_MASK << p10_EVENT_MMCR3_SHIFT) | \
(EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \
EVENT_LINUX_MASK | \
EVENT_PSEL_MASK))
/* /*
* Layout of constraint bits: * Layout of constraint bits:
* *
...@@ -135,6 +160,9 @@ ...@@ -135,6 +160,9 @@
#define CNST_CACHE_PMC4_VAL (1ull << 54) #define CNST_CACHE_PMC4_VAL (1ull << 54)
#define CNST_CACHE_PMC4_MASK CNST_CACHE_PMC4_VAL #define CNST_CACHE_PMC4_MASK CNST_CACHE_PMC4_VAL
#define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55)
#define CNST_L2L3_GROUP_MASK CNST_L2L3_GROUP_VAL(0x1f)
/* /*
* For NC we are counting up to 4 events. This requires three bits, and we need * For NC we are counting up to 4 events. This requires three bits, and we need
* the fifth event to overflow and set the 4th bit. To achieve that we bias the * the fifth event to overflow and set the 4th bit. To achieve that we bias the
...@@ -191,7 +219,7 @@ ...@@ -191,7 +219,7 @@
#define MMCRA_THR_CTR_EXP(v) (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\ #define MMCRA_THR_CTR_EXP(v) (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\
MMCRA_THR_CTR_EXP_MASK) MMCRA_THR_CTR_EXP_MASK)
/* MMCR1 Threshold Compare bit constant for power9 */ /* MMCRA Threshold Compare bit constant for power9 */
#define p9_MMCRA_THR_CMP_SHIFT 45 #define p9_MMCRA_THR_CMP_SHIFT 45
/* Bits in MMCR2 for PowerISA v2.07 */ /* Bits in MMCR2 for PowerISA v2.07 */
...@@ -202,6 +230,9 @@ ...@@ -202,6 +230,9 @@
#define MAX_ALT 2 #define MAX_ALT 2
#define MAX_PMU_COUNTERS 6 #define MAX_PMU_COUNTERS 6
/* Bits in MMCR3 for PowerISA v3.10 */
#define MMCR3_SHIFT(pmc) (49 - (15 * ((pmc) - 1)))
#define ISA207_SIER_TYPE_SHIFT 15 #define ISA207_SIER_TYPE_SHIFT 15
#define ISA207_SIER_TYPE_MASK (0x7ull << ISA207_SIER_TYPE_SHIFT) #define ISA207_SIER_TYPE_MASK (0x7ull << ISA207_SIER_TYPE_SHIFT)
......
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Performance counter support for POWER10 processors.
*
* Copyright 2020 Madhavan Srinivasan, IBM Corporation.
* Copyright 2020 Athira Rajeev, IBM Corporation.
*/
/*
* Power10 event codes.
*/
EVENT(PM_RUN_CYC, 0x600f4);
EVENT(PM_DISP_STALL_CYC, 0x100f8);
EVENT(PM_EXEC_STALL, 0x30008);
EVENT(PM_RUN_INST_CMPL, 0x500fa);
EVENT(PM_BR_CMPL, 0x4d05e);
EVENT(PM_BR_MPRED_CMPL, 0x400f6);
/* All L1 D cache load references counted at finish, gated by reject */
EVENT(PM_LD_REF_L1, 0x100fc);
/* Load Missed L1 */
EVENT(PM_LD_MISS_L1, 0x3e054);
/* Store Missed L1 */
EVENT(PM_ST_MISS_L1, 0x300f0);
/* L1 cache data prefetches */
EVENT(PM_LD_PREFETCH_CACHE_LINE_MISS, 0x1002c);
/* Demand iCache Miss */
EVENT(PM_L1_ICACHE_MISS, 0x200fc);
/* Instruction fetches from L1 */
EVENT(PM_INST_FROM_L1, 0x04080);
/* Instruction Demand sectors wriittent into IL1 */
EVENT(PM_INST_FROM_L1MISS, 0x03f00000001c040);
/* Instruction prefetch written into IL1 */
EVENT(PM_IC_PREF_REQ, 0x040a0);
/* The data cache was reloaded from local core's L3 due to a demand load */
EVENT(PM_DATA_FROM_L3, 0x01340000001c040);
/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
EVENT(PM_DATA_FROM_L3MISS, 0x300fe);
/* Data PTEG reload */
EVENT(PM_DTLB_MISS, 0x300fc);
/* ITLB Reloaded */
EVENT(PM_ITLB_MISS, 0x400fc);
EVENT(PM_RUN_CYC_ALT, 0x0001e);
EVENT(PM_RUN_INST_CMPL_ALT, 0x00002);
/*
* Memory Access Events
*
* Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0)
* To enable capturing of memory profiling, these MMCRA bits
* needs to be programmed and corresponding raw event format
* encoding.
*
* MMCRA bits encoding needed are
* SM (Sampling Mode)
* EM (Eligibility for Random Sampling)
* TECE (Threshold Event Counter Event)
* TS (Threshold Start Event)
* TE (Threshold End Event)
*
* Corresponding Raw Encoding bits:
* sample [EM,SM]
* thresh_sel (TECE)
* thresh start (TS)
* thresh end (TE)
*/
EVENT(MEM_LOADS, 0x34340401e0);
EVENT(MEM_STORES, 0x343c0401e0);
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment