Commit 4b49ab70 authored by Andi Kleen's avatar Andi Kleen Committed by Arnaldo Carvalho de Melo

perf stat: Use affinity for reading

Restructure event reading to use affinity to minimize the number of IPIs
needed.

Before on a large test case with 94 CPUs:

  % time     seconds  usecs/call     calls    errors syscall
  ------ ----------- ----------- --------- --------- ----------------
    3.16    0.106079           4     22082           read

After:

    3.43    0.081295           3     22082           read
Signed-off-by: default avatarAndi Kleen <ak@linux.intel.com>
Acked-by: default avatarJiri Olsa <jolsa@kernel.org>
Link: http://lore.kernel.org/lkml/20191121001522.180827-11-andi@firstfloor.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 4804e011
...@@ -266,15 +266,10 @@ static int read_single_counter(struct evsel *counter, int cpu, ...@@ -266,15 +266,10 @@ static int read_single_counter(struct evsel *counter, int cpu,
* Read out the results of a single counter: * Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode * do not aggregate counts across CPUs in system-wide mode
*/ */
static int read_counter(struct evsel *counter, struct timespec *rs) static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
{ {
int nthreads = perf_thread_map__nr(evsel_list->core.threads); int nthreads = perf_thread_map__nr(evsel_list->core.threads);
int ncpus, cpu, thread; int thread;
if (target__has_cpu(&target) && !target__has_per_thread(&target))
ncpus = perf_evsel__nr_cpus(counter);
else
ncpus = 1;
if (!counter->supported) if (!counter->supported)
return -ENOENT; return -ENOENT;
...@@ -283,40 +278,38 @@ static int read_counter(struct evsel *counter, struct timespec *rs) ...@@ -283,40 +278,38 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
nthreads = 1; nthreads = 1;
for (thread = 0; thread < nthreads; thread++) { for (thread = 0; thread < nthreads; thread++) {
for (cpu = 0; cpu < ncpus; cpu++) { struct perf_counts_values *count;
struct perf_counts_values *count;
count = perf_counts(counter->counts, cpu, thread);
/*
* The leader's group read loads data into its group members
* (via perf_evsel__read_counter) and sets threir count->loaded.
*/
if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
read_single_counter(counter, cpu, thread, rs)) {
counter->counts->scaled = -1;
perf_counts(counter->counts, cpu, thread)->ena = 0;
perf_counts(counter->counts, cpu, thread)->run = 0;
return -1;
}
perf_counts__set_loaded(counter->counts, cpu, thread, false); count = perf_counts(counter->counts, cpu, thread);
if (STAT_RECORD) { /*
if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { * The leader's group read loads data into its group members
pr_err("failed to write stat event\n"); * (via perf_evsel__read_counter()) and sets their count->loaded.
return -1; */
} if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
} read_single_counter(counter, cpu, thread, rs)) {
counter->counts->scaled = -1;
perf_counts(counter->counts, cpu, thread)->ena = 0;
perf_counts(counter->counts, cpu, thread)->run = 0;
return -1;
}
perf_counts__set_loaded(counter->counts, cpu, thread, false);
if (verbose > 1) { if (STAT_RECORD) {
fprintf(stat_config.output, if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", pr_err("failed to write stat event\n");
perf_evsel__name(counter), return -1;
cpu,
count->val, count->ena, count->run);
} }
} }
if (verbose > 1) {
fprintf(stat_config.output,
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
perf_evsel__name(counter),
cpu,
count->val, count->ena, count->run);
}
} }
return 0; return 0;
...@@ -325,15 +318,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs) ...@@ -325,15 +318,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
static void read_counters(struct timespec *rs) static void read_counters(struct timespec *rs)
{ {
struct evsel *counter; struct evsel *counter;
int ret; struct affinity affinity;
int i, ncpus, cpu;
if (affinity__setup(&affinity) < 0)
return;
ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
if (!target__has_cpu(&target) || target__has_per_thread(&target))
ncpus = 1;
evlist__for_each_cpu(evsel_list, i, cpu) {
if (i >= ncpus)
break;
affinity__set(&affinity, cpu);
evlist__for_each_entry(evsel_list, counter) {
if (evsel__cpu_iter_skip(counter, cpu))
continue;
if (!counter->err) {
counter->err = read_counter_cpu(counter, rs,
counter->cpu_iter - 1);
}
}
}
affinity__cleanup(&affinity);
evlist__for_each_entry(evsel_list, counter) { evlist__for_each_entry(evsel_list, counter) {
ret = read_counter(counter, rs); if (counter->err)
if (ret)
pr_debug("failed to read counter %s\n", counter->name); pr_debug("failed to read counter %s\n", counter->name);
if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
pr_warning("failed to process counter %s\n", counter->name); pr_warning("failed to process counter %s\n", counter->name);
counter->err = 0;
} }
} }
......
...@@ -86,6 +86,7 @@ struct evsel { ...@@ -86,6 +86,7 @@ struct evsel {
struct list_head config_terms; struct list_head config_terms;
struct bpf_object *bpf_obj; struct bpf_object *bpf_obj;
int bpf_fd; int bpf_fd;
int err;
bool auto_merge_stats; bool auto_merge_stats;
bool merged_stat; bool merged_stat;
const char * metric_expr; const char * metric_expr;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment