Commit 3bff6112 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull performance events updates from Ingo Molnar:
 "x86 Intel updates:

   - Add Jasper Lake support

   - Add support for TopDown metrics on Ice Lake

   - Fix Ice Lake & Tiger Lake uncore support, add Snow Ridge support

   - Add a PCI sub driver to support uncore PMUs where the PCI resources
     have been claimed already - extending the range of supported
     systems.

  x86 AMD updates:

   - Restore 'perf stat -a' behaviour to program the uncore PMU to count
     all CPU threads.

   - Fix setting the proper count when sampling Large Increment per
     Cycle events / 'paired' events.

   - Fix IBS Fetch sampling on F17h and some other IBS fine tuning,
     greatly reducing the number of interrupts when large sample periods
     are specified.

   - Extends Family 17h RAPL support to also work on compatible F19h
     machines.

  Core code updates:

   - Fix race in perf_mmap_close()

   - Add PERF_EV_CAP_SIBLING, to denote that sibling events should be
     closed if the leader is removed.

   - Smaller fixes and updates"

* tag 'perf-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits)
  perf/core: Fix race in the perf_mmap_close() function
  perf/x86: Fix n_metric for cancelled txn
  perf/x86: Fix n_pair for cancelled txn
  x86/events/amd/iommu: Fix sizeof mismatch
  perf/x86/intel: Check perf metrics feature for each CPU
  perf/x86/intel: Fix Ice Lake event constraint table
  perf/x86/intel/uncore: Fix the scale of the IMC free-running events
  perf/x86/intel/uncore: Fix for iio mapping on Skylake Server
  perf/x86/msr: Add Jasper Lake support
  perf/x86/intel: Add Jasper Lake support
  perf/x86/intel/uncore: Reduce the number of CBOX counters
  perf/x86/intel/uncore: Update Ice Lake uncore units
  perf/x86/intel/uncore: Split the Ice Lake and Tiger Lake MSR uncore support
  perf/x86/intel/uncore: Support PCIe3 unit on Snow Ridge
  perf/x86/intel/uncore: Generic support for the PCI sub driver
  perf/x86/intel/uncore: Factor out uncore_pci_pmu_unregister()
  perf/x86/intel/uncore: Factor out uncore_pci_pmu_register()
  perf/x86/intel/uncore: Factor out uncore_pci_find_dev_pmu()
  perf/x86/intel/uncore: Factor out uncore_pci_get_dev_die_info()
  perf/amd/uncore: Inform the user how many counters each uncore PMU has
  ...
parents dd502a81 f91072ed
......@@ -89,6 +89,7 @@ struct perf_ibs {
u64 max_period;
unsigned long offset_mask[1];
int offset_max;
unsigned int fetch_count_reset_broken : 1;
struct cpu_perf_ibs __percpu *pcpu;
struct attribute **format_attrs;
......@@ -334,11 +335,18 @@ static u64 get_ibs_op_count(u64 config)
{
u64 count = 0;
if (config & IBS_OP_VAL)
count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
if (ibs_caps & IBS_CAPS_RDWROPCNT)
count += (config & IBS_OP_CUR_CNT) >> 32;
/*
* If the internal 27-bit counter rolled over, the count is MaxCnt
* and the lower 7 bits of CurCnt are randomized.
* Otherwise CurCnt has the full 27-bit current counter value.
*/
if (config & IBS_OP_VAL) {
count = (config & IBS_OP_MAX_CNT) << 4;
if (ibs_caps & IBS_CAPS_OPCNTEXT)
count += config & IBS_OP_MAX_CNT_EXT_MASK;
} else if (ibs_caps & IBS_CAPS_RDWROPCNT) {
count = (config & IBS_OP_CUR_CNT) >> 32;
}
return count;
}
......@@ -363,7 +371,12 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
struct hw_perf_event *hwc, u64 config)
{
wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
u64 tmp = hwc->config | config;
if (perf_ibs->fetch_count_reset_broken)
wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask);
wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask);
}
/*
......@@ -394,7 +407,7 @@ static void perf_ibs_start(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
u64 period;
u64 period, config = 0;
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
return;
......@@ -403,13 +416,19 @@ static void perf_ibs_start(struct perf_event *event, int flags)
hwc->state = 0;
perf_ibs_set_period(perf_ibs, hwc, &period);
if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) {
config |= period & IBS_OP_MAX_CNT_EXT_MASK;
period &= ~IBS_OP_MAX_CNT_EXT_MASK;
}
config |= period >> 4;
/*
* Set STARTED before enabling the hardware, such that a subsequent NMI
* must observe it.
*/
set_bit(IBS_STARTED, pcpu->state);
clear_bit(IBS_STOPPING, pcpu->state);
perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
perf_ibs_enable_event(perf_ibs, hwc, config);
perf_event_update_userpage(event);
}
......@@ -577,7 +596,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
struct perf_ibs_data ibs_data;
int offset, size, check_rip, offset_max, throttle = 0;
unsigned int msr;
u64 *buf, *config, period;
u64 *buf, *config, period, new_config = 0;
if (!test_bit(IBS_STARTED, pcpu->state)) {
fail:
......@@ -626,18 +645,24 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
perf_ibs->offset_max,
offset + 1);
} while (offset < offset_max);
/*
* Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately
* depending on their availability.
* Can't add to offset_max as they are staggered
*/
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
/*
* Read IbsBrTarget and IbsOpData4 separately
* depending on their availability.
* Can't add to offset_max as they are staggered
*/
if (ibs_caps & IBS_CAPS_BRNTRGT) {
rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
size++;
if (perf_ibs == &perf_ibs_op) {
if (ibs_caps & IBS_CAPS_BRNTRGT) {
rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
size++;
}
if (ibs_caps & IBS_CAPS_OPDATA4) {
rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
size++;
}
}
if (ibs_caps & IBS_CAPS_OPDATA4) {
rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) {
rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++);
size++;
}
}
......@@ -666,13 +691,17 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
if (throttle) {
perf_ibs_stop(event, 0);
} else {
period >>= 4;
if ((ibs_caps & IBS_CAPS_RDWROPCNT) &&
(*config & IBS_OP_CNT_CTL))
period |= *config & IBS_OP_CUR_CNT_RAND;
if (perf_ibs == &perf_ibs_op) {
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
period &= ~IBS_OP_MAX_CNT_EXT_MASK;
}
if ((ibs_caps & IBS_CAPS_RDWROPCNT) && (*config & IBS_OP_CNT_CTL))
new_config |= *config & IBS_OP_CUR_CNT_RAND;
}
new_config |= period >> 4;
perf_ibs_enable_event(perf_ibs, hwc, period);
perf_ibs_enable_event(perf_ibs, hwc, new_config);
}
perf_event_update_userpage(event);
......@@ -733,12 +762,26 @@ static __init void perf_event_ibs_init(void)
{
struct attribute **attr = ibs_op_format_attrs;
/*
* Some chips fail to reset the fetch count when it is written; instead
* they need a 0-1 transition of IbsFetchEn.
*/
if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
perf_ibs_fetch.fetch_count_reset_broken = 1;
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
if (ibs_caps & IBS_CAPS_OPCNT) {
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
*attr++ = &format_attr_cnt_ctl.attr;
}
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK;
perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK;
perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK;
}
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
......
......@@ -379,7 +379,7 @@ static __init int _init_events_attrs(void)
while (amd_iommu_v2_event_descs[i].attr.attr.name)
i++;
attrs = kcalloc(i + 1, sizeof(struct attribute **), GFP_KERNEL);
attrs = kcalloc(i + 1, sizeof(*attrs), GFP_KERNEL);
if (!attrs)
return -ENOMEM;
......
......@@ -181,28 +181,28 @@ static void amd_uncore_del(struct perf_event *event, int flags)
}
/*
* Convert logical CPU number to L3 PMC Config ThreadMask format
* Return a full thread and slice mask unless user
* has provided them
*/
static u64 l3_thread_slice_mask(int cpu)
static u64 l3_thread_slice_mask(u64 config)
{
u64 thread_mask, core = topology_core_id(cpu);
unsigned int shift, thread = 0;
if (boot_cpu_data.x86 <= 0x18)
return ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
if (topology_smt_supported() && !topology_is_primary_thread(cpu))
thread = 1;
if (boot_cpu_data.x86 <= 0x18) {
shift = AMD64_L3_THREAD_SHIFT + 2 * (core % 4) + thread;
thread_mask = BIT_ULL(shift);
return AMD64_L3_SLICE_MASK | thread_mask;
}
core = (core << AMD64_L3_COREID_SHIFT) & AMD64_L3_COREID_MASK;
shift = AMD64_L3_THREAD_SHIFT + thread;
thread_mask = BIT_ULL(shift);
/*
* If the user doesn't specify a threadmask, they're not trying to
* count core 0, so we enable all cores & threads.
* We'll also assume that they want to count slice 0 if they specify
* a threadmask and leave sliceid and enallslices unpopulated.
*/
if (!(config & AMD64_L3_F19H_THREAD_MASK))
return AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
AMD64_L3_EN_ALL_CORES;
return AMD64_L3_EN_ALL_SLICES | core | thread_mask;
return config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
AMD64_L3_COREID_MASK);
}
static int amd_uncore_event_init(struct perf_event *event)
......@@ -232,7 +232,7 @@ static int amd_uncore_event_init(struct perf_event *event)
* For other events, the two fields do not affect the count.
*/
if (l3_mask && is_llc_event(event))
hwc->config |= l3_thread_slice_mask(event->cpu);
hwc->config |= l3_thread_slice_mask(event->attr.config);
uncore = event_to_amd_uncore(event);
if (!uncore)
......@@ -274,47 +274,72 @@ static struct attribute_group amd_uncore_attr_group = {
.attrs = amd_uncore_attrs,
};
/*
* Similar to PMU_FORMAT_ATTR but allowing for format_attr to be assigned based
* on family
*/
#define AMD_FORMAT_ATTR(_dev, _name, _format) \
static ssize_t \
_dev##_show##_name(struct device *dev, \
struct device_attribute *attr, \
char *page) \
{ \
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
return sprintf(page, _format "\n"); \
} \
static struct device_attribute format_attr_##_dev##_name = __ATTR_RO(_dev);
/* Used for each uncore counter type */
#define AMD_ATTRIBUTE(_name) \
static struct attribute *amd_uncore_format_attr_##_name[] = { \
&format_attr_event_##_name.attr, \
&format_attr_umask.attr, \
NULL, \
}; \
static struct attribute_group amd_uncore_format_group_##_name = { \
.name = "format", \
.attrs = amd_uncore_format_attr_##_name, \
}; \
static const struct attribute_group *amd_uncore_attr_groups_##_name[] = { \
&amd_uncore_attr_group, \
&amd_uncore_format_group_##_name, \
NULL, \
#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
struct kobj_attribute *attr, \
char *page) \
{ \
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
return sprintf(page, _format "\n"); \
} \
static struct kobj_attribute format_attr_##_var = \
__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */
DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3 */
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */
DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */
DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */
DEFINE_UNCORE_FORMAT_ATTR(threadmask2, threadmask, "config:56-57"); /* F19h L3 */
DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3 */
DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */
DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */
static struct attribute *amd_uncore_df_format_attr[] = {
&format_attr_event12.attr, /* event14 if F17h+ */
&format_attr_umask.attr,
NULL,
};
static struct attribute *amd_uncore_l3_format_attr[] = {
&format_attr_event12.attr, /* event8 if F17h+ */
&format_attr_umask.attr,
NULL, /* slicemask if F17h, coreid if F19h */
NULL, /* threadmask8 if F17h, enallslices if F19h */
NULL, /* enallcores if F19h */
NULL, /* sliceid if F19h */
NULL, /* threadmask2 if F19h */
NULL,
};
static struct attribute_group amd_uncore_df_format_group = {
.name = "format",
.attrs = amd_uncore_df_format_attr,
};
AMD_FORMAT_ATTR(event, , "config:0-7,32-35");
AMD_FORMAT_ATTR(umask, , "config:8-15");
AMD_FORMAT_ATTR(event, _df, "config:0-7,32-35,59-60");
AMD_FORMAT_ATTR(event, _l3, "config:0-7");
AMD_ATTRIBUTE(df);
AMD_ATTRIBUTE(l3);
static struct attribute_group amd_uncore_l3_format_group = {
.name = "format",
.attrs = amd_uncore_l3_format_attr,
};
static const struct attribute_group *amd_uncore_df_attr_groups[] = {
&amd_uncore_attr_group,
&amd_uncore_df_format_group,
NULL,
};
static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
&amd_uncore_attr_group,
&amd_uncore_l3_format_group,
NULL,
};
static struct pmu amd_nb_pmu = {
.task_ctx_nr = perf_invalid_context,
.attr_groups = amd_uncore_df_attr_groups,
.name = "amd_nb",
.event_init = amd_uncore_event_init,
.add = amd_uncore_add,
.del = amd_uncore_del,
......@@ -326,6 +351,8 @@ static struct pmu amd_nb_pmu = {
static struct pmu amd_llc_pmu = {
.task_ctx_nr = perf_invalid_context,
.attr_groups = amd_uncore_l3_attr_groups,
.name = "amd_l2",
.event_init = amd_uncore_event_init,
.add = amd_uncore_add,
.del = amd_uncore_del,
......@@ -529,6 +556,8 @@ static int amd_uncore_cpu_dead(unsigned int cpu)
static int __init amd_uncore_init(void)
{
struct attribute **df_attr = amd_uncore_df_format_attr;
struct attribute **l3_attr = amd_uncore_l3_format_attr;
int ret = -ENODEV;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
......@@ -538,6 +567,8 @@ static int __init amd_uncore_init(void)
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
return -ENODEV;
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L2;
if (boot_cpu_data.x86 >= 0x17) {
/*
* For F17h and above, the Northbridge counters are
......@@ -545,27 +576,16 @@ static int __init amd_uncore_init(void)
* counters are supported too. The PMUs are exported
* based on family as either L2 or L3 and NB or DF.
*/
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L3;
amd_nb_pmu.name = "amd_df";
amd_llc_pmu.name = "amd_l3";
format_attr_event_df.show = &event_show_df;
format_attr_event_l3.show = &event_show_l3;
l3_mask = true;
} else {
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L2;
amd_nb_pmu.name = "amd_nb";
amd_llc_pmu.name = "amd_l2";
format_attr_event_df = format_attr_event;
format_attr_event_l3 = format_attr_event;
l3_mask = false;
}
amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
if (boot_cpu_data.x86 >= 0x17)
*df_attr = &format_attr_event14.attr;
amd_uncore_nb = alloc_percpu(struct amd_uncore *);
if (!amd_uncore_nb) {
ret = -ENOMEM;
......@@ -575,13 +595,29 @@ static int __init amd_uncore_init(void)
if (ret)
goto fail_nb;
pr_info("%s NB counters detected\n",
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?
"HYGON" : "AMD");
pr_info("%d %s %s counters detected\n", num_counters_nb,
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
amd_nb_pmu.name);
ret = 0;
}
if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
if (boot_cpu_data.x86 >= 0x19) {
*l3_attr++ = &format_attr_event8.attr;
*l3_attr++ = &format_attr_umask.attr;
*l3_attr++ = &format_attr_coreid.attr;
*l3_attr++ = &format_attr_enallslices.attr;
*l3_attr++ = &format_attr_enallcores.attr;
*l3_attr++ = &format_attr_sliceid.attr;
*l3_attr++ = &format_attr_threadmask2.attr;
} else if (boot_cpu_data.x86 >= 0x17) {
*l3_attr++ = &format_attr_event8.attr;
*l3_attr++ = &format_attr_umask.attr;
*l3_attr++ = &format_attr_slicemask.attr;
*l3_attr++ = &format_attr_threadmask8.attr;
}
amd_uncore_llc = alloc_percpu(struct amd_uncore *);
if (!amd_uncore_llc) {
ret = -ENOMEM;
......@@ -591,9 +627,9 @@ static int __init amd_uncore_init(void)
if (ret)
goto fail_llc;
pr_info("%s LLC counters detected\n",
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?
"HYGON" : "AMD");
pr_info("%d %s %s counters detected\n", num_counters_llc,
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
amd_llc_pmu.name);
ret = 0;
}
......
......@@ -105,6 +105,9 @@ u64 x86_perf_event_update(struct perf_event *event)
if (unlikely(!hwc->event_base))
return 0;
if (unlikely(is_topdown_count(event)) && x86_pmu.update_topdown_event)
return x86_pmu.update_topdown_event(event);
/*
* Careful: an NMI might modify the previous event value.
*
......@@ -1056,6 +1059,45 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
return unsched ? -EINVAL : 0;
}
static int add_nr_metric_event(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
if (is_metric_event(event)) {
if (cpuc->n_metric == INTEL_TD_METRIC_NUM)
return -EINVAL;
cpuc->n_metric++;
cpuc->n_txn_metric++;
}
return 0;
}
static void del_nr_metric_event(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
if (is_metric_event(event))
cpuc->n_metric--;
}
static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
int max_count, int n)
{
if (x86_pmu.intel_cap.perf_metrics && add_nr_metric_event(cpuc, event))
return -EINVAL;
if (n >= max_count + cpuc->n_metric)
return -EINVAL;
cpuc->event_list[n] = event;
if (is_counter_pair(&event->hw)) {
cpuc->n_pair++;
cpuc->n_txn_pair++;
}
return 0;
}
/*
* dogrp: true if must collect siblings events (group)
* returns total number of events and error code
......@@ -1092,28 +1134,22 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
}
if (is_x86_event(leader)) {
if (n >= max_count)
if (collect_event(cpuc, leader, max_count, n))
return -EINVAL;
cpuc->event_list[n] = leader;
n++;
if (is_counter_pair(&leader->hw))
cpuc->n_pair++;
}
if (!dogrp)
return n;
for_each_sibling_event(event, leader) {
if (!is_x86_event(event) ||
event->state <= PERF_EVENT_STATE_OFF)
if (!is_x86_event(event) || event->state <= PERF_EVENT_STATE_OFF)
continue;
if (n >= max_count)
if (collect_event(cpuc, event, max_count, n))
return -EINVAL;
cpuc->event_list[n] = event;
n++;
if (is_counter_pair(&event->hw))
cpuc->n_pair++;
}
return n;
}
......@@ -1135,11 +1171,16 @@ static inline void x86_assign_hw_event(struct perf_event *event,
hwc->event_base = 0;
break;
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
/* All the metric events are mapped onto the fixed counter 3. */
idx = INTEL_PMC_IDX_FIXED_SLOTS;
/* fall through */
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
(idx - INTEL_PMC_IDX_FIXED);
hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) | 1<<30;
hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) |
INTEL_PMC_FIXED_RDPMC_BASE;
break;
default:
......@@ -1270,6 +1311,10 @@ int x86_perf_event_set_period(struct perf_event *event)
if (unlikely(!hwc->event_base))
return 0;
if (unlikely(is_topdown_count(event)) &&
x86_pmu.set_topdown_event_period)
return x86_pmu.set_topdown_event_period(event);
/*
* If we are way outside a reasonable range then just skip forward:
*/
......@@ -1309,11 +1354,11 @@ int x86_perf_event_set_period(struct perf_event *event)
wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
/*
* Clear the Merge event counter's upper 16 bits since
* Sign extend the Merge event counter's upper 16 bits since
* we currently declare a 48-bit counter width
*/
if (is_counter_pair(hwc))
wrmsrl(x86_pmu_event_addr(idx + 1), 0);
wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff);
/*
* Due to erratum on certan cpu we need
......@@ -1551,6 +1596,8 @@ static void x86_pmu_del(struct perf_event *event, int flags)
}
cpuc->event_constraint[i-1] = NULL;
--cpuc->n_events;
if (x86_pmu.intel_cap.perf_metrics)
del_nr_metric_event(cpuc, event);
perf_event_update_userpage(event);
......@@ -2018,6 +2065,8 @@ static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
perf_pmu_disable(pmu);
__this_cpu_write(cpu_hw_events.n_txn, 0);
__this_cpu_write(cpu_hw_events.n_txn_pair, 0);
__this_cpu_write(cpu_hw_events.n_txn_metric, 0);
}
/*
......@@ -2043,6 +2092,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
*/
__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
__this_cpu_sub(cpu_hw_events.n_pair, __this_cpu_read(cpu_hw_events.n_txn_pair));
__this_cpu_sub(cpu_hw_events.n_metric, __this_cpu_read(cpu_hw_events.n_txn_metric));
perf_pmu_enable(pmu);
}
......@@ -2264,17 +2315,15 @@ static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *m
static int x86_pmu_event_idx(struct perf_event *event)
{
int idx = event->hw.idx;
struct hw_perf_event *hwc = &event->hw;
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
if (!(hwc->flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return 0;
if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
idx -= INTEL_PMC_IDX_FIXED;
idx |= 1 << 30;
}
return idx + 1;
if (is_metric_idx(hwc->idx))
return INTEL_PMC_FIXED_RDPMC_METRICS + 1;
else
return hwc->event_base_rdpmc + 1;
}
static ssize_t get_attr_rdpmc(struct device *cdev,
......
This diff is collapsed.
......@@ -670,9 +670,7 @@ int intel_pmu_drain_bts_buffer(void)
static inline void intel_pmu_drain_pebs_buffer(void)
{
struct pt_regs regs;
x86_pmu.drain_pebs(&regs);
x86_pmu.drain_pebs(NULL);
}
/*
......@@ -1737,6 +1735,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
struct x86_perf_regs perf_regs;
struct pt_regs *regs = &perf_regs.regs;
void *at = get_next_pebs_record_by_bit(base, top, bit);
struct pt_regs dummy_iregs;
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
/*
......@@ -1749,6 +1748,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
} else if (!intel_pmu_save_and_restart(event))
return;
if (!iregs)
iregs = &dummy_iregs;
while (count > 1) {
setup_sample(event, iregs, at, &data, regs);
perf_event_output(event, &data, regs);
......@@ -1758,16 +1760,22 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
}
setup_sample(event, iregs, at, &data, regs);
/*
* All but the last records are processed.
* The last one is left to be able to call the overflow handler.
*/
if (perf_event_overflow(event, &data, regs)) {
x86_pmu_stop(event, 0);
return;
if (iregs == &dummy_iregs) {
/*
* The PEBS records may be drained in the non-overflow context,
* e.g., large PEBS + context switch. Perf should treat the
* last record the same as other PEBS records, and doesn't
* invoke the generic overflow handler.
*/
perf_event_output(event, &data, regs);
} else {
/*
* All but the last records are processed.
* The last one is left to be able to call the overflow handler.
*/
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
}
}
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
......
This diff is collapsed.
......@@ -552,6 +552,7 @@ extern struct intel_uncore_type **uncore_msr_uncores;
extern struct intel_uncore_type **uncore_pci_uncores;
extern struct intel_uncore_type **uncore_mmio_uncores;
extern struct pci_driver *uncore_pci_driver;
extern struct pci_driver *uncore_pci_sub_driver;
extern raw_spinlock_t pci2phy_map_lock;
extern struct list_head pci2phy_map_head;
extern struct pci_extra_dev *uncore_extra_pci_dev;
......@@ -567,6 +568,7 @@ void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
void skl_uncore_cpu_init(void);
void icl_uncore_cpu_init(void);
void tgl_uncore_cpu_init(void);
void tgl_uncore_mmio_init(void);
void tgl_l_uncore_mmio_init(void);
int snb_pci2phy_map_init(int devid);
......
......@@ -126,6 +126,10 @@
#define ICL_UNC_CBO_0_PER_CTR0 0x702
#define ICL_UNC_CBO_MSR_OFFSET 0x8
/* ICL ARB register */
#define ICL_UNC_ARB_PER_CTR 0x3b1
#define ICL_UNC_ARB_PERFEVTSEL 0x3b3
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
......@@ -313,15 +317,21 @@ void skl_uncore_cpu_init(void)
snb_uncore_arb.ops = &skl_uncore_msr_ops;
}
static struct intel_uncore_ops icl_uncore_msr_ops = {
.disable_event = snb_uncore_msr_disable_event,
.enable_event = snb_uncore_msr_enable_event,
.read_counter = uncore_msr_read_counter,
};
static struct intel_uncore_type icl_uncore_cbox = {
.name = "cbox",
.num_counters = 4,
.num_counters = 2,
.perf_ctr_bits = 44,
.perf_ctr = ICL_UNC_CBO_0_PER_CTR0,
.event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
.event_mask = SNB_UNC_RAW_EVENT_MASK,
.msr_offset = ICL_UNC_CBO_MSR_OFFSET,
.ops = &skl_uncore_msr_ops,
.ops = &icl_uncore_msr_ops,
.format_group = &snb_uncore_format_group,
};
......@@ -350,13 +360,25 @@ static struct intel_uncore_type icl_uncore_clockbox = {
.single_fixed = 1,
.event_mask = SNB_UNC_CTL_EV_SEL_MASK,
.format_group = &icl_uncore_clock_format_group,
.ops = &skl_uncore_msr_ops,
.ops = &icl_uncore_msr_ops,
.event_descs = icl_uncore_events,
};
static struct intel_uncore_type icl_uncore_arb = {
.name = "arb",
.num_counters = 1,
.num_boxes = 1,
.perf_ctr_bits = 44,
.perf_ctr = ICL_UNC_ARB_PER_CTR,
.event_ctl = ICL_UNC_ARB_PERFEVTSEL,
.event_mask = SNB_UNC_RAW_EVENT_MASK,
.ops = &icl_uncore_msr_ops,
.format_group = &snb_uncore_format_group,
};
static struct intel_uncore_type *icl_msr_uncores[] = {
&icl_uncore_cbox,
&snb_uncore_arb,
&icl_uncore_arb,
&icl_uncore_clockbox,
NULL,
};
......@@ -374,6 +396,21 @@ void icl_uncore_cpu_init(void)
{
uncore_msr_uncores = icl_msr_uncores;
icl_uncore_cbox.num_boxes = icl_get_cbox_num();
}
static struct intel_uncore_type *tgl_msr_uncores[] = {
&icl_uncore_cbox,
&snb_uncore_arb,
&icl_uncore_clockbox,
NULL,
};
void tgl_uncore_cpu_init(void)
{
uncore_msr_uncores = tgl_msr_uncores;
icl_uncore_cbox.num_boxes = icl_get_cbox_num();
icl_uncore_cbox.ops = &skl_uncore_msr_ops;
icl_uncore_clockbox.ops = &skl_uncore_msr_ops;
snb_uncore_arb.ops = &skl_uncore_msr_ops;
}
......
......@@ -393,6 +393,11 @@
#define SNR_M2M_PCI_PMON_BOX_CTL 0x438
#define SNR_M2M_PCI_PMON_UMASK_EXT 0xff
/* SNR PCIE3 */
#define SNR_PCIE3_PCI_PMON_CTL0 0x508
#define SNR_PCIE3_PCI_PMON_CTR0 0x4e8
#define SNR_PCIE3_PCI_PMON_BOX_CTL 0x4e0
/* SNR IMC */
#define SNR_IMC_MMIO_PMON_FIXED_CTL 0x54
#define SNR_IMC_MMIO_PMON_FIXED_CTR 0x38
......@@ -3749,7 +3754,9 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type)
ret = skx_iio_get_topology(type);
if (ret)
return ret;
goto clear_attr_update;
ret = -ENOMEM;
/* One more for NULL. */
attrs = kcalloc((uncore_max_dies() + 1), sizeof(*attrs), GFP_KERNEL);
......@@ -3781,8 +3788,9 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type)
kfree(eas);
kfree(attrs);
kfree(type->topology);
clear_attr_update:
type->attr_update = NULL;
return -ENOMEM;
return ret;
}
static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
......@@ -4551,12 +4559,46 @@ static struct intel_uncore_type snr_uncore_m2m = {
.format_group = &snr_m2m_uncore_format_group,
};
static void snr_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
{
struct pci_dev *pdev = box->pci_dev;
struct hw_perf_event *hwc = &event->hw;
pci_write_config_dword(pdev, hwc->config_base, (u32)(hwc->config | SNBEP_PMON_CTL_EN));
pci_write_config_dword(pdev, hwc->config_base + 4, (u32)(hwc->config >> 32));
}
static struct intel_uncore_ops snr_pcie3_uncore_pci_ops = {
.init_box = snr_m2m_uncore_pci_init_box,
.disable_box = snbep_uncore_pci_disable_box,
.enable_box = snbep_uncore_pci_enable_box,
.disable_event = snbep_uncore_pci_disable_event,
.enable_event = snr_uncore_pci_enable_event,
.read_counter = snbep_uncore_pci_read_counter,
};
static struct intel_uncore_type snr_uncore_pcie3 = {
.name = "pcie3",
.num_counters = 4,
.num_boxes = 1,
.perf_ctr_bits = 48,
.perf_ctr = SNR_PCIE3_PCI_PMON_CTR0,
.event_ctl = SNR_PCIE3_PCI_PMON_CTL0,
.event_mask = SKX_IIO_PMON_RAW_EVENT_MASK,
.event_mask_ext = SKX_IIO_PMON_RAW_EVENT_MASK_EXT,
.box_ctl = SNR_PCIE3_PCI_PMON_BOX_CTL,
.ops = &snr_pcie3_uncore_pci_ops,
.format_group = &skx_uncore_iio_format_group,
};
enum {
SNR_PCI_UNCORE_M2M,
SNR_PCI_UNCORE_PCIE3,
};
static struct intel_uncore_type *snr_pci_uncores[] = {
[SNR_PCI_UNCORE_M2M] = &snr_uncore_m2m,
[SNR_PCI_UNCORE_PCIE3] = &snr_uncore_pcie3,
NULL,
};
......@@ -4573,6 +4615,19 @@ static struct pci_driver snr_uncore_pci_driver = {
.id_table = snr_uncore_pci_ids,
};
static const struct pci_device_id snr_uncore_pci_sub_ids[] = {
{ /* PCIe3 RP */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x334a),
.driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 0, SNR_PCI_UNCORE_PCIE3, 0),
},
{ /* end: all zeroes */ }
};
static struct pci_driver snr_uncore_pci_sub_driver = {
.name = "snr_uncore_sub",
.id_table = snr_uncore_pci_sub_ids,
};
int snr_uncore_pci_init(void)
{
/* SNR UBOX DID */
......@@ -4584,6 +4639,7 @@ int snr_uncore_pci_init(void)
uncore_pci_uncores = snr_pci_uncores;
uncore_pci_driver = &snr_uncore_pci_driver;
uncore_pci_sub_driver = &snr_uncore_pci_sub_driver;
return 0;
}
......@@ -4751,10 +4807,10 @@ static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = {
INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"),
INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"),
INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"),
INTEL_UNCORE_EVENT_DESC(read.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"),
INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"),
INTEL_UNCORE_EVENT_DESC(write.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"),
{ /* end: all zeroes */ },
};
......@@ -5212,17 +5268,17 @@ static struct uncore_event_desc icx_uncore_imc_freerunning_events[] = {
INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"),
INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"),
INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"),
INTEL_UNCORE_EVENT_DESC(read.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"),
INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"),
INTEL_UNCORE_EVENT_DESC(write.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(ddrt_read, "event=0xff,umask=0x30"),
INTEL_UNCORE_EVENT_DESC(ddrt_read.scale, "3.814697266e-6"),
INTEL_UNCORE_EVENT_DESC(ddrt_read.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(ddrt_read.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(ddrt_write, "event=0xff,umask=0x31"),
INTEL_UNCORE_EVENT_DESC(ddrt_write.scale, "3.814697266e-6"),
INTEL_UNCORE_EVENT_DESC(ddrt_write.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(ddrt_write.unit, "MiB"),
{ /* end: all zeroes */ },
};
......
......@@ -78,6 +78,7 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_ATOM_TREMONT_D:
case INTEL_FAM6_ATOM_TREMONT:
case INTEL_FAM6_ATOM_TREMONT_L:
case INTEL_FAM6_XEON_PHI_KNL:
case INTEL_FAM6_XEON_PHI_KNM:
......
......@@ -79,6 +79,31 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
#define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */
#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */
#define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */
#define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */
static inline bool is_topdown_count(struct perf_event *event)
{
return event->hw.flags & PERF_X86_EVENT_TOPDOWN;
}
static inline bool is_metric_event(struct perf_event *event)
{
u64 config = event->attr.config;
return ((config & ARCH_PERFMON_EVENTSEL_EVENT) == 0) &&
((config & INTEL_ARCH_EVENT_MASK) >= INTEL_TD_METRIC_RETIRING) &&
((config & INTEL_ARCH_EVENT_MASK) <= INTEL_TD_METRIC_MAX);
}
static inline bool is_slots_event(struct perf_event *event)
{
return (event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_TD_SLOTS;
}
static inline bool is_topdown_event(struct perf_event *event)
{
return is_metric_event(event) || is_slots_event(event);
}
struct amd_nb {
int nb_id; /* NorthBridge id */
......@@ -210,6 +235,8 @@ struct cpu_hw_events {
they've never been enabled yet */
int n_txn; /* the # last events in the below arrays;
added in the current transaction */
int n_txn_pair;
int n_txn_metric;
int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
u64 tags[X86_PMC_IDX_MAX];
......@@ -284,6 +311,12 @@ struct cpu_hw_events {
*/
u64 tfa_shadow;
/*
* Perf Metrics
*/
/* number of accepted metrics events */
int n_metric;
/*
* AMD specific bits
*/
......@@ -375,6 +408,19 @@ struct cpu_hw_events {
#define FIXED_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
/*
* The special metric counters do not actually exist. They are calculated from
* the combination of the FxCtr3 + MSR_PERF_METRICS.
*
* The special metric counters are mapped to a dummy offset for the scheduler.
* The sharing between multiple users of the same metric without multiplexing
* is not allowed, even though the hardware supports that in principle.
*/
#define METRIC_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, (1ULL << (INTEL_PMC_IDX_METRIC_BASE + n)), \
INTEL_ARCH_EVENT_MASK)
/*
* Constraint on the Event code + UMask
*/
......@@ -537,7 +583,7 @@ union perf_capabilities {
*/
u64 full_width_write:1;
u64 pebs_baseline:1;
u64 pebs_metrics_available:1;
u64 perf_metrics:1;
u64 pebs_output_pt_available:1;
};
u64 capabilities;
......@@ -726,6 +772,12 @@ struct x86_pmu {
*/
atomic_t lbr_exclusive[x86_lbr_exclusive_max];
/*
* Intel perf metrics
*/
u64 (*update_topdown_event)(struct perf_event *event);
int (*set_topdown_event_period)(struct perf_event *event);
/*
* perf task context (i.e. struct perf_event_context::task_ctx_data)
* switch helper to bridge calls from perf/core to perf/x86.
......
......@@ -815,6 +815,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr),
X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h),
X86_MATCH_VENDOR_FAM(HYGON, 0x18, &model_amd_fam17h),
X86_MATCH_VENDOR_FAM(AMD, 0x19, &model_amd_fam17h),
{},
};
MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
......
......@@ -467,6 +467,7 @@
#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
#define MSR_AMD64_IBSCTL 0xc001103a
#define MSR_AMD64_IBSBRTARGET 0xc001103b
#define MSR_AMD64_ICIBSEXTDCTL 0xc001103c
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
#define MSR_AMD64_SEV 0xc0010131
......@@ -860,11 +861,14 @@
#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b
#define MSR_CORE_PERF_FIXED_CTR3 0x0000030c
#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d
#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e
#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
#define MSR_PERF_METRICS 0x00000329
/* PERF_GLOBAL_OVF_CTL bits */
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT)
......
......@@ -196,13 +196,29 @@ struct x86_pmu_capability {
* Fixed-purpose performance events:
*/
/* RDPMC offset for Fixed PMCs */
#define INTEL_PMC_FIXED_RDPMC_BASE (1 << 30)
#define INTEL_PMC_FIXED_RDPMC_METRICS (1 << 29)
/*
* All 3 fixed-mode PMCs are configured via this single MSR:
* All the fixed-mode PMCs are configured via this single MSR:
*/
#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
/*
* The counts are available in three separate MSRs:
* There is no event-code assigned to the fixed-mode PMCs.
*
* For a fixed-mode PMC, which has an equivalent event on a general-purpose
* PMC, the event-code of the equivalent event is used for the fixed-mode PMC,
* e.g., Instr_Retired.Any and CPU_CLK_Unhalted.Core.
*
* For a fixed-mode PMC, which doesn't have an equivalent event, a
* pseudo-encoding is used, e.g., CPU_CLK_Unhalted.Ref and TOPDOWN.SLOTS.
* The pseudo event-code for a fixed-mode PMC must be 0x00.
* The pseudo umask-code is 0xX. The X equals the index of the fixed
* counter + 1, e.g., the fixed counter 2 has the pseudo-encoding 0x0300.
*
* The counts are available in separate MSRs:
*/
/* Instr_Retired.Any: */
......@@ -213,29 +229,84 @@ struct x86_pmu_capability {
#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
#define INTEL_PMC_IDX_FIXED_CPU_CYCLES (INTEL_PMC_IDX_FIXED + 1)
/* CPU_CLK_Unhalted.Ref: */
/* CPU_CLK_Unhalted.Ref: event=0x00,umask=0x3 (pseudo-encoding) */
#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
#define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2)
#define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES)
/* TOPDOWN.SLOTS: event=0x00,umask=0x4 (pseudo-encoding) */
#define MSR_ARCH_PERFMON_FIXED_CTR3 0x30c
#define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3)
#define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
/*
* We model BTS tracing as another fixed-mode PMC.
*
* We choose a value in the middle of the fixed event range, since lower
* We choose the value 47 for the fixed index of BTS, since lower
* values are used by actual fixed events and higher values are used
* to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
*/
#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16)
#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 15)
/*
* The PERF_METRICS MSR is modeled as several magic fixed-mode PMCs, one for
* each TopDown metric event.
*
* Internally the TopDown metric events are mapped to the FxCtr 3 (SLOTS).
*/
#define INTEL_PMC_IDX_METRIC_BASE (INTEL_PMC_IDX_FIXED + 16)
#define INTEL_PMC_IDX_TD_RETIRING (INTEL_PMC_IDX_METRIC_BASE + 0)
#define INTEL_PMC_IDX_TD_BAD_SPEC (INTEL_PMC_IDX_METRIC_BASE + 1)
#define INTEL_PMC_IDX_TD_FE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 2)
#define INTEL_PMC_IDX_TD_BE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 3)
#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_BE_BOUND
#define INTEL_PMC_MSK_TOPDOWN ((0xfull << INTEL_PMC_IDX_METRIC_BASE) | \
INTEL_PMC_MSK_FIXED_SLOTS)
#define GLOBAL_STATUS_COND_CHG BIT_ULL(63)
#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(62)
#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61)
#define GLOBAL_STATUS_ASIF BIT_ULL(60)
#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59)
#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55)
/*
* There is no event-code assigned to the TopDown events.
*
* For the slots event, use the pseudo code of the fixed counter 3.
*
* For the metric events, the pseudo event-code is 0x00.
* The pseudo umask-code starts from the middle of the pseudo event
* space, 0x80.
*/
#define INTEL_TD_SLOTS 0x0400 /* TOPDOWN.SLOTS */
/* Level 1 metrics */
#define INTEL_TD_METRIC_RETIRING 0x8000 /* Retiring metric */
#define INTEL_TD_METRIC_BAD_SPEC 0x8100 /* Bad speculation metric */
#define INTEL_TD_METRIC_FE_BOUND 0x8200 /* FE bound metric */
#define INTEL_TD_METRIC_BE_BOUND 0x8300 /* BE bound metric */
#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_BE_BOUND
#define INTEL_TD_METRIC_NUM 4
static inline bool is_metric_idx(int idx)
{
return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM;
}
static inline bool is_topdown_idx(int idx)
{
return is_metric_idx(idx) || idx == INTEL_PMC_IDX_FIXED_SLOTS;
}
#define INTEL_PMC_OTHER_TOPDOWN_BITS(bit) \
(~(0x1ull << bit) & INTEL_PMC_MSK_TOPDOWN)
#define GLOBAL_STATUS_COND_CHG BIT_ULL(63)
#define GLOBAL_STATUS_BUFFER_OVF_BIT 62
#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(GLOBAL_STATUS_BUFFER_OVF_BIT)
#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61)
#define GLOBAL_STATUS_ASIF BIT_ULL(60)
#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59)
#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55
#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT)
#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48
#define GLOBAL_CTRL_EN_PERF_METRICS 48
/*
* We model guest LBR event tracing as another fixed-mode PMC like BTS.
*
......@@ -334,6 +405,7 @@ struct pebs_xmm {
#define IBS_OP_ENABLE (1ULL<<17)
#define IBS_OP_MAX_CNT 0x0000FFFFULL
#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
#define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL<<20) /* separate upper 7 bits */
#define IBS_RIP_INVALID (1ULL<<38)
#ifdef CONFIG_X86_LOCAL_APIC
......
......@@ -212,17 +212,26 @@ struct hw_perf_event {
*/
u64 sample_period;
/*
* The period we started this sample with.
*/
u64 last_period;
union {
struct { /* Sampling */
/*
* The period we started this sample with.
*/
u64 last_period;
/*
* However much is left of the current period; note that this is
* a full 64bit value and allows for generation of periods longer
* than hardware might allow.
*/
local64_t period_left;
/*
* However much is left of the current period;
* note that this is a full 64bit value and
* allows for generation of periods longer
* than hardware might allow.
*/
local64_t period_left;
};
struct { /* Topdown events counting for context switch */
u64 saved_metric;
u64 saved_slots;
};
};
/*
* State for throttling the event, see __perf_event_overflow() and
......@@ -576,9 +585,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *,
* PERF_EV_CAP_SOFTWARE: Is a software event.
* PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
* from any CPU in the package where it is active.
* PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
* cannot be a group leader. If an event with this flag is detached from the
* group it is scheduled out and moved into an unrecoverable ERROR state.
*/
#define PERF_EV_CAP_SOFTWARE BIT(0)
#define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1)
#define PERF_EV_CAP_SIBLING BIT(2)
#define SWEVENT_HLIST_BITS 8
#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
......@@ -859,7 +872,6 @@ struct perf_cpu_context {
struct list_head cgrp_cpuctx_entry;
#endif
struct list_head sched_cb_entry;
int sched_cb_usage;
int online;
......
......@@ -383,7 +383,6 @@ static DEFINE_MUTEX(perf_sched_mutex);
static atomic_t perf_sched_count;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly;
......@@ -2134,8 +2133,24 @@ static inline struct list_head *get_event_list(struct perf_event *event)
return event->attr.pinned ? &ctx->pinned_active : &ctx->flexible_active;
}
/*
* Events that have PERF_EV_CAP_SIBLING require being part of a group and
* cannot exist on their own, schedule them out and move them into the ERROR
* state. Also see _perf_event_enable(), it will not be able to recover
* this ERROR state.
*/
static inline void perf_remove_sibling_event(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
event_sched_out(event, cpuctx, ctx);
perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
}
static void perf_group_detach(struct perf_event *event)
{
struct perf_event *leader = event->group_leader;
struct perf_event *sibling, *tmp;
struct perf_event_context *ctx = event->ctx;
......@@ -2154,7 +2169,7 @@ static void perf_group_detach(struct perf_event *event)
/*
* If this is a sibling, remove it from its group.
*/
if (event->group_leader != event) {
if (leader != event) {
list_del_init(&event->sibling_list);
event->group_leader->nr_siblings--;
goto out;
......@@ -2167,6 +2182,9 @@ static void perf_group_detach(struct perf_event *event)
*/
list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) {
if (sibling->event_caps & PERF_EV_CAP_SIBLING)
perf_remove_sibling_event(sibling);
sibling->group_leader = sibling;
list_del_init(&sibling->sibling_list);
......@@ -2184,10 +2202,10 @@ static void perf_group_detach(struct perf_event *event)
}
out:
perf_event__header_size(event->group_leader);
for_each_sibling_event(tmp, event->group_leader)
for_each_sibling_event(tmp, leader)
perf_event__header_size(tmp);
perf_event__header_size(leader);
}
static bool is_orphaned_event(struct perf_event *event)
......@@ -2980,6 +2998,7 @@ static void _perf_event_enable(struct perf_event *event)
raw_spin_lock_irq(&ctx->lock);
if (event->state >= PERF_EVENT_STATE_INACTIVE ||
event->state < PERF_EVENT_STATE_ERROR) {
out:
raw_spin_unlock_irq(&ctx->lock);
return;
}
......@@ -2991,8 +3010,16 @@ static void _perf_event_enable(struct perf_event *event)
* has gone back into error state, as distinct from the task having
* been scheduled away before the cross-call arrived.
*/
if (event->state == PERF_EVENT_STATE_ERROR)
if (event->state == PERF_EVENT_STATE_ERROR) {
/*
* Detached SIBLING events cannot leave ERROR state.
*/
if (event->event_caps & PERF_EV_CAP_SIBLING &&
event->group_leader == event)
goto out;
event->state = PERF_EVENT_STATE_OFF;
}
raw_spin_unlock_irq(&ctx->lock);
event_function_call(event, __perf_event_enable, NULL);
......@@ -3357,10 +3384,12 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
struct perf_event_context *parent, *next_parent;
struct perf_cpu_context *cpuctx;
int do_switch = 1;
struct pmu *pmu;
if (likely(!ctx))
return;
pmu = ctx->pmu;
cpuctx = __get_cpu_context(ctx);
if (!cpuctx->task_ctx)
return;
......@@ -3390,11 +3419,15 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
raw_spin_lock(&ctx->lock);
raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
if (context_equiv(ctx, next_ctx)) {
struct pmu *pmu = ctx->pmu;
WRITE_ONCE(ctx->task, next);
WRITE_ONCE(next_ctx->task, task);
perf_pmu_disable(pmu);
if (cpuctx->sched_cb_usage && pmu->sched_task)
pmu->sched_task(ctx, false);
/*
* PMU specific parts of task perf context can require
* additional synchronization. As an example of such
......@@ -3406,6 +3439,8 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
else
swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
perf_pmu_enable(pmu);
/*
* RCU_INIT_POINTER here is safe because we've not
* modified the ctx and the above modification of
......@@ -3428,21 +3463,22 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
if (do_switch) {
raw_spin_lock(&ctx->lock);
perf_pmu_disable(pmu);
if (cpuctx->sched_cb_usage && pmu->sched_task)
pmu->sched_task(ctx, false);
task_ctx_sched_out(cpuctx, ctx, EVENT_ALL);
perf_pmu_enable(pmu);
raw_spin_unlock(&ctx->lock);
}
}
static DEFINE_PER_CPU(struct list_head, sched_cb_list);
void perf_sched_cb_dec(struct pmu *pmu)
{
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
this_cpu_dec(perf_sched_cb_usages);
if (!--cpuctx->sched_cb_usage)
list_del(&cpuctx->sched_cb_entry);
--cpuctx->sched_cb_usage;
}
......@@ -3450,10 +3486,7 @@ void perf_sched_cb_inc(struct pmu *pmu)
{
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
if (!cpuctx->sched_cb_usage++)
list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
this_cpu_inc(perf_sched_cb_usages);
cpuctx->sched_cb_usage++;
}
/*
......@@ -3464,30 +3497,22 @@ void perf_sched_cb_inc(struct pmu *pmu)
* PEBS requires this to provide PID/TID information. This requires we flush
* all queued PEBS records before we context switch to a new task.
*/
static void perf_pmu_sched_task(struct task_struct *prev,
struct task_struct *next,
bool sched_in)
static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in)
{
struct perf_cpu_context *cpuctx;
struct pmu *pmu;
if (prev == next)
return;
pmu = cpuctx->ctx.pmu; /* software PMUs will not have sched_task */
list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
pmu = cpuctx->ctx.pmu; /* software PMUs will not have sched_task */
if (WARN_ON_ONCE(!pmu->sched_task))
continue;
if (WARN_ON_ONCE(!pmu->sched_task))
return;
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(pmu);
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(pmu);
pmu->sched_task(cpuctx->task_ctx, sched_in);
pmu->sched_task(cpuctx->task_ctx, sched_in);
perf_pmu_enable(pmu);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
perf_pmu_enable(pmu);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
static void perf_event_switch(struct task_struct *task,
......@@ -3512,9 +3537,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
{
int ctxn;
if (__this_cpu_read(perf_sched_cb_usages))
perf_pmu_sched_task(task, next, false);
if (atomic_read(&nr_switch_events))
perf_event_switch(task, next, false);
......@@ -3746,10 +3768,14 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
struct task_struct *task)
{
struct perf_cpu_context *cpuctx;
struct pmu *pmu = ctx->pmu;
cpuctx = __get_cpu_context(ctx);
if (cpuctx->task_ctx == ctx)
if (cpuctx->task_ctx == ctx) {
if (cpuctx->sched_cb_usage)
__perf_pmu_sched_task(cpuctx, true);
return;
}
perf_ctx_lock(cpuctx, ctx);
/*
......@@ -3759,7 +3785,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
if (!ctx->nr_events)
goto unlock;
perf_pmu_disable(ctx->pmu);
perf_pmu_disable(pmu);
/*
* We want to keep the following priority order:
* cpu pinned (that don't need to move), task pinned,
......@@ -3771,7 +3797,11 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
perf_event_sched_in(cpuctx, ctx, task);
perf_pmu_enable(ctx->pmu);
if (cpuctx->sched_cb_usage && pmu->sched_task)
pmu->sched_task(cpuctx->task_ctx, true);
perf_pmu_enable(pmu);
unlock:
perf_ctx_unlock(cpuctx, ctx);
......@@ -3814,9 +3844,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
if (atomic_read(&nr_switch_events))
perf_event_switch(task, prev, true);
if (__this_cpu_read(perf_sched_cb_usages))
perf_pmu_sched_task(prev, task, true);
}
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
......@@ -5869,11 +5896,11 @@ static void perf_pmu_output_stop(struct perf_event *event);
static void perf_mmap_close(struct vm_area_struct *vma)
{
struct perf_event *event = vma->vm_file->private_data;
struct perf_buffer *rb = ring_buffer_get(event);
struct user_struct *mmap_user = rb->mmap_user;
int mmap_locked = rb->mmap_locked;
unsigned long size = perf_data_size(rb);
bool detach_rest = false;
if (event->pmu->event_unmapped)
event->pmu->event_unmapped(event, vma->vm_mm);
......@@ -5904,7 +5931,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mutex_unlock(&event->mmap_mutex);
}
atomic_dec(&rb->mmap_count);
if (atomic_dec_and_test(&rb->mmap_count))
detach_rest = true;
if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
goto out_put;
......@@ -5913,7 +5941,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mutex_unlock(&event->mmap_mutex);
/* If there's still other mmap()s of this buffer, we're done. */
if (atomic_read(&rb->mmap_count))
if (!detach_rest)
goto out_put;
/*
......@@ -12829,7 +12857,6 @@ static void __init perf_event_init_all_cpus(void)
#ifdef CONFIG_CGROUP_PERF
INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
#endif
INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
}
}
......
......@@ -106,9 +106,10 @@ static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
struct module *mod)
{
int len = strlen(mod->name);
int len = strlen(module_name(mod));
const char *name = trace_kprobe_symbol(tk);
return strncmp(mod->name, name, len) == 0 && name[len] == ':';
return strncmp(module_name(mod), name, len) == 0 && name[len] == ':';
}
static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
......@@ -688,7 +689,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
if (ret)
pr_warn("Failed to re-register probe %s on %s: %d\n",
trace_probe_name(&tk->tp),
mod->name, ret);
module_name(mod), ret);
}
}
mutex_unlock(&event_mutex);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment