Commit 8db5cabc authored by Weilin Wang's avatar Weilin Wang Committed by Arnaldo Carvalho de Melo

perf stat: Fork and launch 'perf record' when 'perf stat' needs to get retire...

perf stat: Fork and launch 'perf record' when 'perf stat' needs to get retire latency value for a metric.

When retire_latency value is used in a metric formula, evsel would fork
a 'perf record' process with "-e" and "-W" options. 'perf record' will
collect required retire_latency values in parallel while 'perf stat' is
collecting counting values.

At the point of time that 'perf stat' stops counting, evsel would stop
'perf record' by sending sigterm signal to 'perf record' process.
Sampled data will be processed to get retire latency value. Another
thread is required to synchronize between 'perf stat' and 'perf record'
when we pass data through pipe.

Retire_latency evsel is not opened for 'perf stat' so that there is no
counter wasted on it. This commit includes code suggested by Namhyung to
adjust reading size for groups that include retire_latency evsels.

In current :R parsing implementation, the parser would recognize events
with retire_latency modifier and insert them into the evlist like a
normal event.  Ideally, we need to avoid counting these events.

In this commit, at the time when a retire_latency evsel is read, set the
retire latency value processed from the sampled data to count value.
This sampled retire latency value will be used for metric calculation
and final event count print out. No special metric calculation and event
print out code required for retire_latency events.
Reviewed-by: default avatarNamhyung Kim <namhyung@kernel.org>
Signed-off-by: default avatarWeilin Wang <weilin.wang@intel.com>
Acked-by: default avatarIan Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Samantha Alt <samantha.alt@intel.com>
Link: https://lore.kernel.org/r/20240720062102.444578-4-weilin.wang@intel.com
[ Squashed the 3rd and 4th commit in the series to keep it building patch by patch ]
[ Constified the 'struct perf_tool' pointer in process_sample_event() ]
[ Use perf_tool__init(&tool, false) to address a segfault I reported and Ian/Weilin diagnosed ]
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent a9a4ca57
......@@ -89,6 +89,12 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
return 1;
}
/* Retire latency event should not be group leader*/
if (lhs->retire_lat && !rhs->retire_lat)
return 1;
if (!lhs->retire_lat && rhs->retire_lat)
return -1;
/* Default ordering by insertion index. */
return lhs->core.idx - rhs->core.idx;
}
......@@ -70,6 +70,7 @@
#include "util/bpf_counter.h"
#include "util/iostat.h"
#include "util/util.h"
#include "util/intel-tpebs.h"
#include "asm/bug.h"
#include <linux/time64.h>
......@@ -683,6 +684,9 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
if (child_pid != -1)
kill(child_pid, SIGTERM);
tpebs_delete();
return COUNTER_FATAL;
}
......
......@@ -156,6 +156,7 @@ perf-util-y += clockid.o
perf-util-y += list_sort.o
perf-util-y += mutex.o
perf-util-y += sharded_mutex.o
perf-util-$(CONFIG_X86_64) += intel-tpebs.o
perf-util-$(CONFIG_LIBBPF) += bpf_map.o
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
......
......@@ -33,6 +33,7 @@
#include "util/bpf-filter.h"
#include "util/stat.h"
#include "util/util.h"
#include "util/intel-tpebs.h"
#include <signal.h>
#include <unistd.h>
#include <sched.h>
......@@ -179,6 +180,7 @@ void evlist__delete(struct evlist *evlist)
if (evlist == NULL)
return;
tpebs_delete();
evlist__free_stats(evlist);
evlist__munmap(evlist);
evlist__close(evlist);
......
......@@ -59,6 +59,7 @@
#include <internal/xyarray.h>
#include <internal/lib.h>
#include <internal/threadmap.h>
#include "util/intel-tpebs.h"
#include <linux/ctype.h>
......@@ -1539,6 +1540,11 @@ static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
}
static int evsel__read_retire_lat(struct evsel *evsel, int cpu_map_idx, int thread)
{
return tpebs_set_evsel(evsel, cpu_map_idx, thread);
}
static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
u64 val, u64 ena, u64 run, u64 lost)
{
......@@ -1546,6 +1552,12 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
count = perf_counts(counter->counts, cpu_map_idx, thread);
if (counter->retire_lat) {
evsel__read_retire_lat(counter, cpu_map_idx, thread);
perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
return;
}
count->val = val;
count->ena = ena;
count->run = run;
......@@ -1554,6 +1566,60 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
}
static bool evsel__group_has_tpebs(struct evsel *leader)
{
struct evsel *evsel;
for_each_group_evsel(evsel, leader) {
if (evsel__is_retire_lat(evsel))
return true;
}
return false;
}
static u64 evsel__group_read_nr_members(struct evsel *leader)
{
u64 nr = leader->core.nr_members;
struct evsel *evsel;
for_each_group_evsel(evsel, leader) {
if (evsel__is_retire_lat(evsel))
nr--;
}
return nr;
}
static u64 evsel__group_read_size(struct evsel *leader)
{
u64 read_format = leader->core.attr.read_format;
int entry = sizeof(u64); /* value */
int size = 0;
int nr = 1;
if (!evsel__group_has_tpebs(leader))
return perf_evsel__read_size(&leader->core);
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
size += sizeof(u64);
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
size += sizeof(u64);
if (read_format & PERF_FORMAT_ID)
entry += sizeof(u64);
if (read_format & PERF_FORMAT_LOST)
entry += sizeof(u64);
if (read_format & PERF_FORMAT_GROUP) {
nr = evsel__group_read_nr_members(leader);
size += sizeof(u64);
}
size += entry * nr;
return size;
}
static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data)
{
u64 read_format = leader->core.attr.read_format;
......@@ -1562,7 +1628,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int
nr = *data++;
if (nr != (u64) leader->core.nr_members)
if (nr != evsel__group_read_nr_members(leader))
return -EINVAL;
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
......@@ -1592,7 +1658,7 @@ static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread)
{
struct perf_stat_evsel *ps = leader->stats;
u64 read_format = leader->core.attr.read_format;
int size = perf_evsel__read_size(&leader->core);
int size = evsel__group_read_size(leader);
u64 *data = ps->group_data;
if (!(read_format & PERF_FORMAT_ID))
......@@ -1784,6 +1850,9 @@ int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
if (evsel__is_tool(evsel))
return evsel__read_tool(evsel, cpu_map_idx, thread);
if (evsel__is_retire_lat(evsel))
return evsel__read_retire_lat(evsel, cpu_map_idx, thread);
if (evsel->core.attr.read_format & PERF_FORMAT_GROUP)
return evsel__read_group(evsel, cpu_map_idx, thread);
......@@ -2200,6 +2269,9 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
return 0;
}
if (evsel__is_retire_lat(evsel))
return tpebs_start(evsel->evlist);
err = __evsel__prepare_open(evsel, cpus, threads);
if (err)
return err;
......@@ -2392,6 +2464,8 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
void evsel__close(struct evsel *evsel)
{
if (evsel__is_retire_lat(evsel))
tpebs_delete();
perf_evsel__close(&evsel->core);
perf_evsel__free_id(&evsel->core);
}
......@@ -3357,6 +3431,9 @@ static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
{
int cpu_map_idx, thread;
if (evsel__is_retire_lat(evsel))
return 0;
for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->core.fd);
thread++) {
......
......@@ -311,6 +311,11 @@ static inline bool evsel__is_tool(const struct evsel *evsel)
return evsel->tool_event != PERF_TOOL_NONE;
}
static inline bool evsel__is_retire_lat(const struct evsel *evsel)
{
return evsel->retire_lat;
}
const char *evsel__group_name(struct evsel *evsel);
int evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* intel_tpebs.h: Intel TEPBS support
*/
#ifndef INCLUDE__PERF_INTEL_TPEBS_H__
#define INCLUDE__PERF_INTEL_TPEBS_H__
#include "stat.h"
#include "evsel.h"
#ifdef HAVE_ARCH_X86_64_SUPPORT
extern bool tpebs_recording;
int tpebs_start(struct evlist *evsel_list);
void tpebs_delete(void);
int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread);
#else
static inline int tpebs_start(struct evlist *evsel_list __maybe_unused)
{
return 0;
}
static inline void tpebs_delete(void) {};
static inline int tpebs_set_evsel(struct evsel *evsel __maybe_unused,
int cpu_map_idx __maybe_unused,
int thread __maybe_unused)
{
return 0;
}
#endif
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment