Commit a59fb796 authored by Ian Rogers's avatar Ian Rogers Committed by Namhyung Kim

perf metrics: Compute unmerged uncore metrics individually

When merging counts from multiple uncore PMUs the metric is only
computed for the metric leader. When merging/aggregation is disabled,
prior to this patch just the leader's metric would be computed. Fix
this by computing the metric for each PMU.

On a SkylakeX:
Before:
```
$ perf stat -A -M memory_bandwidth_total -a sleep 1

 Performance counter stats for 'system wide':

CPU0               82,217      UNC_M_CAS_COUNT.RD [uncore_imc_0] #      9.2 MB/s  memory_bandwidth_total
CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_0] #      0.0 MB/s  memory_bandwidth_total
CPU0               61,395      UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU0                    0      UNC_M_CAS_COUNT.RD [uncore_imc_1]
CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_1]
CPU0                    0      UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU0               81,570      UNC_M_CAS_COUNT.RD [uncore_imc_2]
CPU18             113,886      UNC_M_CAS_COUNT.RD [uncore_imc_2]
CPU0               62,330      UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU18              66,942      UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU0               75,489      UNC_M_CAS_COUNT.RD [uncore_imc_3]
CPU18              27,958      UNC_M_CAS_COUNT.RD [uncore_imc_3]
CPU0               55,864      UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU18              38,727      UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU0                    0      UNC_M_CAS_COUNT.RD [uncore_imc_4]
CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_4]
CPU0                    0      UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU0               75,423      UNC_M_CAS_COUNT.RD [uncore_imc_5]
CPU18             104,527      UNC_M_CAS_COUNT.RD [uncore_imc_5]
CPU0               57,596      UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU18              56,777      UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU0        1,003,440,851 ns   duration_time

       1.003440851 seconds time elapsed
```

After:
```
$ perf stat -A -M memory_bandwidth_total -a sleep 1

 Performance counter stats for 'system wide':

CPU0               88,968      UNC_M_CAS_COUNT.RD [uncore_imc_0] #      9.5 MB/s  memory_bandwidth_total
CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_0] #      0.0 MB/s  memory_bandwidth_total
CPU0               59,498      UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU0                    0      UNC_M_CAS_COUNT.RD [uncore_imc_1] #      0.0 MB/s  memory_bandwidth_total
CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_1] #      0.0 MB/s  memory_bandwidth_total
CPU0                    0      UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU0               88,635      UNC_M_CAS_COUNT.RD [uncore_imc_2] #      9.5 MB/s  memory_bandwidth_total
CPU18             117,975      UNC_M_CAS_COUNT.RD [uncore_imc_2] #     11.5 MB/s  memory_bandwidth_total
CPU0               60,829      UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU18              62,105      UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU0               82,238      UNC_M_CAS_COUNT.RD [uncore_imc_3] #      8.7 MB/s  memory_bandwidth_total
CPU18              22,906      UNC_M_CAS_COUNT.RD [uncore_imc_3] #      3.6 MB/s  memory_bandwidth_total
CPU0               53,959      UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU18              32,990      UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU0                    0      UNC_M_CAS_COUNT.RD [uncore_imc_4] #      0.0 MB/s  memory_bandwidth_total
CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_4] #      0.0 MB/s  memory_bandwidth_total
CPU0                    0      UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU0               83,595      UNC_M_CAS_COUNT.RD [uncore_imc_5] #      8.9 MB/s  memory_bandwidth_total
CPU18             110,151      UNC_M_CAS_COUNT.RD [uncore_imc_5] #     10.5 MB/s  memory_bandwidth_total
CPU0               56,540      UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU18              53,816      UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU0        1,003,353,416 ns   duration_time
```

Signed-off-by: Ian Rogers <irogers@google.com>                                  |
Acked-by: default avatarNamhyung Kim <namhyung@kernel.org>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Kaige Ye <ye@kaige.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: John Garry <john.g.garry@oracle.com>
Signed-off-by: default avatarNamhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240221070754.4163916-2-irogers@google.com
parent eee41e6b
...@@ -44,6 +44,8 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events, ...@@ -44,6 +44,8 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events,
if (!metric_events) if (!metric_events)
return NULL; return NULL;
if (evsel->metric_leader)
me.evsel = evsel->metric_leader;
nd = rblist__find(metric_events, &me); nd = rblist__find(metric_events, &me);
if (nd) if (nd)
return container_of(nd, struct metric_event, nd); return container_of(nd, struct metric_event, nd);
......
...@@ -356,6 +356,7 @@ static void print_nsecs(struct perf_stat_config *config, ...@@ -356,6 +356,7 @@ static void print_nsecs(struct perf_stat_config *config,
} }
static int prepare_metric(const struct metric_expr *mexp, static int prepare_metric(const struct metric_expr *mexp,
const struct evsel *evsel,
struct expr_parse_ctx *pctx, struct expr_parse_ctx *pctx,
int aggr_idx) int aggr_idx)
{ {
...@@ -399,8 +400,29 @@ static int prepare_metric(const struct metric_expr *mexp, ...@@ -399,8 +400,29 @@ static int prepare_metric(const struct metric_expr *mexp,
source_count = 1; source_count = 1;
} else { } else {
struct perf_stat_evsel *ps = metric_events[i]->stats; struct perf_stat_evsel *ps = metric_events[i]->stats;
struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx]; struct perf_stat_aggr *aggr;
/*
* If there are multiple uncore PMUs and we're not
* reading the leader's stats, determine the stats for
* the appropriate uncore PMU.
*/
if (evsel && evsel->metric_leader &&
evsel->pmu != evsel->metric_leader->pmu &&
mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) {
struct evsel *pos;
evlist__for_each_entry(evsel->evlist, pos) {
if (pos->pmu != evsel->pmu)
continue;
if (pos->metric_leader != mexp->metric_events[i])
continue;
ps = pos->stats;
source_count = 1;
break;
}
}
aggr = &ps->aggr[aggr_idx];
if (!aggr) if (!aggr)
break; break;
...@@ -416,6 +438,7 @@ static int prepare_metric(const struct metric_expr *mexp, ...@@ -416,6 +438,7 @@ static int prepare_metric(const struct metric_expr *mexp,
source_count = 0; source_count = 0;
} else { } else {
val = aggr->counts.val; val = aggr->counts.val;
if (!source_count)
source_count = evsel__source_count(metric_events[i]); source_count = evsel__source_count(metric_events[i]);
} }
} }
...@@ -463,7 +486,7 @@ static void generic_metric(struct perf_stat_config *config, ...@@ -463,7 +486,7 @@ static void generic_metric(struct perf_stat_config *config,
pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list); pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
pctx->sctx.runtime = runtime; pctx->sctx.runtime = runtime;
pctx->sctx.system_wide = config->system_wide; pctx->sctx.system_wide = config->system_wide;
i = prepare_metric(mexp, pctx, aggr_idx); i = prepare_metric(mexp, evsel, pctx, aggr_idx);
if (i < 0) { if (i < 0) {
expr__ctx_free(pctx); expr__ctx_free(pctx);
return; return;
...@@ -524,7 +547,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx) ...@@ -524,7 +547,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
if (!pctx) if (!pctx)
return NAN; return NAN;
if (prepare_metric(mexp, pctx, aggr_idx) < 0) if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0)
goto out; goto out;
if (expr__parse(&ratio, pctx, mexp->metric_expr)) if (expr__parse(&ratio, pctx, mexp->metric_expr))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment