Commit 430daf2d authored by Andi Kleen's avatar Andi Kleen Committed by Arnaldo Carvalho de Melo

perf stat: Collapse identically named events

The uncore PMU has a lot of duplicated PMUs for different subsystems.
When expanding an uncore alias we usually end up with a large
number of identically named aliases, which makes perf stat
output difficult to read.

Automatically sum them up in perf stat, unless --no-merge is specified.

This can be default because only the uncores generally have duplicated
aliases. Other PMUs have unique names.

Before:

  % perf stat --no-merge -a -e unc_c_llc_lookup.any sleep 1

  Performance counter stats for 'system wide':

           694,976 Bytes unc_c_llc_lookup.any
           706,304 Bytes unc_c_llc_lookup.any
           956,608 Bytes unc_c_llc_lookup.any
           782,720 Bytes unc_c_llc_lookup.any
           605,696 Bytes unc_c_llc_lookup.any
           442,816 Bytes unc_c_llc_lookup.any
           659,328 Bytes unc_c_llc_lookup.any
           509,312 Bytes unc_c_llc_lookup.any
           263,936 Bytes unc_c_llc_lookup.any
           592,448 Bytes unc_c_llc_lookup.any
           672,448 Bytes unc_c_llc_lookup.any
           608,640 Bytes unc_c_llc_lookup.any
           641,024 Bytes unc_c_llc_lookup.any
           856,896 Bytes unc_c_llc_lookup.any
           808,832 Bytes unc_c_llc_lookup.any
           684,864 Bytes unc_c_llc_lookup.any
           710,464 Bytes unc_c_llc_lookup.any
           538,304 Bytes unc_c_llc_lookup.any

       1.002577660 seconds time elapsed

After:

  % perf stat -a -e unc_c_llc_lookup.any sleep 1

  Performance counter stats for 'system wide':

         2,685,120 Bytes unc_c_llc_lookup.any

       1.002648032 seconds time elapsed

v2: Split collect_aliases. Rename alias flag.
v3: Make sure unsupported/not counted is always printed.
v4: Factor out callback change into separate patch.
v5: Move check for bad results here
    Move merged check into collect_data
Signed-off-by: default avatarAndi Kleen <ak@linux.intel.com>
Acked-by: default avatarJiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20170320201711.14142-3-andi@firstfloor.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent fbe51fba
...@@ -236,6 +236,9 @@ To interpret the results it is usually needed to know on which ...@@ -236,6 +236,9 @@ To interpret the results it is usually needed to know on which
CPUs the workload runs on. If needed the CPUs can be forced using CPUs the workload runs on. If needed the CPUs can be forced using
taskset. taskset.
--no-merge::
Do not merge results from same PMUs.
EXAMPLES EXAMPLES
-------- --------
......
...@@ -140,6 +140,7 @@ static unsigned int unit_width = 4; /* strlen("unit") */ ...@@ -140,6 +140,7 @@ static unsigned int unit_width = 4; /* strlen("unit") */
static bool forever = false; static bool forever = false;
static bool metric_only = false; static bool metric_only = false;
static bool force_metric_only = false; static bool force_metric_only = false;
static bool no_merge = false;
static struct timespec ref_time; static struct timespec ref_time;
static struct cpu_map *aggr_map; static struct cpu_map *aggr_map;
static aggr_get_id_t aggr_get_id; static aggr_get_id_t aggr_get_id;
...@@ -1182,12 +1183,37 @@ static void aggr_update_shadow(void) ...@@ -1182,12 +1183,37 @@ static void aggr_update_shadow(void)
} }
} }
static void collect_data(struct perf_evsel *counter, static void collect_all_aliases(struct perf_evsel *counter,
void (*cb)(struct perf_evsel *counter, void *data, void (*cb)(struct perf_evsel *counter, void *data,
bool first), bool first),
void *data) void *data)
{ {
struct perf_evsel *alias;
alias = list_prepare_entry(counter, &(evsel_list->entries), node);
list_for_each_entry_continue (alias, &evsel_list->entries, node) {
if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) ||
alias->scale != counter->scale ||
alias->cgrp != counter->cgrp ||
strcmp(alias->unit, counter->unit) ||
nsec_counter(alias) != nsec_counter(counter))
break;
alias->merged_stat = true;
cb(alias, data, false);
}
}
static bool collect_data(struct perf_evsel *counter,
void (*cb)(struct perf_evsel *counter, void *data,
bool first),
void *data)
{
if (counter->merged_stat)
return false;
cb(counter, data, true); cb(counter, data, true);
if (!no_merge)
collect_all_aliases(counter, cb, data);
return true;
} }
struct aggr_data { struct aggr_data {
...@@ -1245,7 +1271,8 @@ static void print_aggr(char *prefix) ...@@ -1245,7 +1271,8 @@ static void print_aggr(char *prefix)
evlist__for_each_entry(evsel_list, counter) { evlist__for_each_entry(evsel_list, counter) {
ad.val = ad.ena = ad.run = 0; ad.val = ad.ena = ad.run = 0;
ad.nr = 0; ad.nr = 0;
collect_data(counter, aggr_cb, &ad); if (!collect_data(counter, aggr_cb, &ad))
continue;
nr = ad.nr; nr = ad.nr;
ena = ad.ena; ena = ad.ena;
run = ad.run; run = ad.run;
...@@ -1318,7 +1345,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) ...@@ -1318,7 +1345,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
double uval; double uval;
struct caggr_data cd = { .avg = 0.0 }; struct caggr_data cd = { .avg = 0.0 };
collect_data(counter, counter_aggr_cb, &cd); if (!collect_data(counter, counter_aggr_cb, &cd))
return;
if (prefix && !metric_only) if (prefix && !metric_only)
fprintf(output, "%s", prefix); fprintf(output, "%s", prefix);
...@@ -1353,7 +1381,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix) ...@@ -1353,7 +1381,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
struct aggr_data ad = { .cpu = cpu }; struct aggr_data ad = { .cpu = cpu };
collect_data(counter, counter_cb, &ad); if (!collect_data(counter, counter_cb, &ad))
return;
val = ad.val; val = ad.val;
ena = ad.ena; ena = ad.ena;
run = ad.run; run = ad.run;
...@@ -1701,6 +1730,7 @@ static const struct option stat_options[] = { ...@@ -1701,6 +1730,7 @@ static const struct option stat_options[] = {
"list of cpus to monitor in system-wide"), "list of cpus to monitor in system-wide"),
OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
"disable CPU count aggregation", AGGR_NONE), "disable CPU count aggregation", AGGR_NONE),
OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"),
OPT_STRING('x', "field-separator", &csv_sep, "separator", OPT_STRING('x', "field-separator", &csv_sep, "separator",
"print counts with custom separator"), "print counts with custom separator"),
OPT_CALLBACK('G', "cgroup", &evsel_list, "name", OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
......
...@@ -131,6 +131,7 @@ struct perf_evsel { ...@@ -131,6 +131,7 @@ struct perf_evsel {
bool cmdline_group_boundary; bool cmdline_group_boundary;
struct list_head config_terms; struct list_head config_terms;
int bpf_fd; int bpf_fd;
bool merged_stat;
}; };
union u64_swap { union u64_swap {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment