Commit fa853c4b authored by Song Liu's avatar Song Liu Committed by Arnaldo Carvalho de Melo

perf stat: Enable counting events for BPF programs

Introduce 'perf stat -b' option, which counts events for BPF programs, like:

  [root@localhost ~]# ~/perf stat -e ref-cycles,cycles -b 254 -I 1000
     1.487903822            115,200      ref-cycles
     1.487903822             86,012      cycles
     2.489147029             80,560      ref-cycles
     2.489147029             73,784      cycles
     3.490341825             60,720      ref-cycles
     3.490341825             37,797      cycles
     4.491540887             37,120      ref-cycles
     4.491540887             31,963      cycles

The example above counts 'cycles' and 'ref-cycles' of BPF program of id
254.  This is similar to bpftool-prog-profile command, but more
flexible.

'perf stat -b' creates per-cpu perf_event and loads fentry/fexit BPF
programs (monitor-progs) to the target BPF program (target-prog). The
monitor-progs read perf_event before and after the target-prog, and
aggregate the difference in a BPF map. Then the user space reads data
from these maps.

A new 'struct bpf_counter' is introduced to provide a common interface
that uses BPF programs/maps to count perf events.

Committer notes:

Removed all but bpf_counter.h includes from evsel.h, not needed at all.

Also BPF map lookups for PERCPU_ARRAYs need to have as its value receive
buffer passed to the kernel libbpf_num_possible_cpus() entries, not
evsel__nr_cpus(evsel), as the former uses
/sys/devices/system/cpu/possible while the later uses
/sys/devices/system/cpu/online, which may be less than the 'possible'
number making the bpf map lookup overwrite memory and cause hard to
debug memory corruption.

We need to continue using evsel__nr_cpus(evsel) when accessing the
perf_counts array tho, not to overwrite another are of memory :-)
Signed-off-by: default avatarSong Liu <songliubraving@fb.com>
Tested-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Link: https://lore.kernel.org/lkml/20210120163031.GU12699@kernel.org/Acked-by: default avatarNamhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: kernel-team@fb.com
Link: http://lore.kernel.org/lkml/20201229214214.3413833-4-songliubraving@fb.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent fbcdaa19
...@@ -75,6 +75,24 @@ report:: ...@@ -75,6 +75,24 @@ report::
--tid=<tid>:: --tid=<tid>::
stat events on existing thread id (comma separated list) stat events on existing thread id (comma separated list)
-b::
--bpf-prog::
stat events on existing bpf program id (comma separated list),
requiring root rights. bpftool-prog could be used to find program
id all bpf programs in the system. For example:
# bpftool prog | head -n 1
17247: tracepoint name sys_enter tag 192d548b9d754067 gpl
# perf stat -e cycles,instructions --bpf-prog 17247 --timeout 1000
Performance counter stats for 'BPF program(s) 17247':
85,967 cycles
28,982 instructions # 0.34 insn per cycle
1.102235068 seconds time elapsed
ifdef::HAVE_LIBPFM[] ifdef::HAVE_LIBPFM[]
--pfm-events events:: --pfm-events events::
Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net) Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
......
...@@ -1015,7 +1015,7 @@ python-clean: ...@@ -1015,7 +1015,7 @@ python-clean:
SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel) SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel)
SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
SKELETONS := SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
ifdef BUILD_BPF_SKEL ifdef BUILD_BPF_SKEL
BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool
......
...@@ -67,6 +67,7 @@ ...@@ -67,6 +67,7 @@
#include "util/top.h" #include "util/top.h"
#include "util/affinity.h" #include "util/affinity.h"
#include "util/pfm.h" #include "util/pfm.h"
#include "util/bpf_counter.h"
#include "asm/bug.h" #include "asm/bug.h"
#include <linux/time64.h> #include <linux/time64.h>
...@@ -409,12 +410,32 @@ static int read_affinity_counters(struct timespec *rs) ...@@ -409,12 +410,32 @@ static int read_affinity_counters(struct timespec *rs)
return 0; return 0;
} }
static int read_bpf_map_counters(void)
{
struct evsel *counter;
int err;
evlist__for_each_entry(evsel_list, counter) {
err = bpf_counter__read(counter);
if (err)
return err;
}
return 0;
}
static void read_counters(struct timespec *rs) static void read_counters(struct timespec *rs)
{ {
struct evsel *counter; struct evsel *counter;
int err;
if (!stat_config.stop_read_counter && (read_affinity_counters(rs) < 0)) if (!stat_config.stop_read_counter) {
return; if (target__has_bpf(&target))
err = read_bpf_map_counters();
else
err = read_affinity_counters(rs);
if (err < 0)
return;
}
evlist__for_each_entry(evsel_list, counter) { evlist__for_each_entry(evsel_list, counter) {
if (counter->err) if (counter->err)
...@@ -496,11 +517,22 @@ static bool handle_interval(unsigned int interval, int *times) ...@@ -496,11 +517,22 @@ static bool handle_interval(unsigned int interval, int *times)
return false; return false;
} }
static void enable_counters(void) static int enable_counters(void)
{ {
struct evsel *evsel;
int err;
if (target__has_bpf(&target)) {
evlist__for_each_entry(evsel_list, evsel) {
err = bpf_counter__enable(evsel);
if (err)
return err;
}
}
if (stat_config.initial_delay < 0) { if (stat_config.initial_delay < 0) {
pr_info(EVLIST_DISABLED_MSG); pr_info(EVLIST_DISABLED_MSG);
return; return 0;
} }
if (stat_config.initial_delay > 0) { if (stat_config.initial_delay > 0) {
...@@ -518,6 +550,7 @@ static void enable_counters(void) ...@@ -518,6 +550,7 @@ static void enable_counters(void)
if (stat_config.initial_delay > 0) if (stat_config.initial_delay > 0)
pr_info(EVLIST_ENABLED_MSG); pr_info(EVLIST_ENABLED_MSG);
} }
return 0;
} }
static void disable_counters(void) static void disable_counters(void)
...@@ -720,7 +753,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) ...@@ -720,7 +753,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
const bool forks = (argc > 0); const bool forks = (argc > 0);
bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
struct affinity affinity; struct affinity affinity;
int i, cpu; int i, cpu, err;
bool second_pass = false; bool second_pass = false;
if (forks) { if (forks) {
...@@ -737,6 +770,13 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) ...@@ -737,6 +770,13 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (affinity__setup(&affinity) < 0) if (affinity__setup(&affinity) < 0)
return -1; return -1;
if (target__has_bpf(&target)) {
evlist__for_each_entry(evsel_list, counter) {
if (bpf_counter__load(counter, &target))
return -1;
}
}
evlist__for_each_cpu (evsel_list, i, cpu) { evlist__for_each_cpu (evsel_list, i, cpu) {
affinity__set(&affinity, cpu); affinity__set(&affinity, cpu);
...@@ -850,7 +890,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) ...@@ -850,7 +890,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
} }
if (STAT_RECORD) { if (STAT_RECORD) {
int err, fd = perf_data__fd(&perf_stat.data); int fd = perf_data__fd(&perf_stat.data);
if (is_pipe) { if (is_pipe) {
err = perf_header__write_pipe(perf_data__fd(&perf_stat.data)); err = perf_header__write_pipe(perf_data__fd(&perf_stat.data));
...@@ -876,7 +916,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) ...@@ -876,7 +916,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (forks) { if (forks) {
evlist__start_workload(evsel_list); evlist__start_workload(evsel_list);
enable_counters(); err = enable_counters();
if (err)
return -1;
if (interval || timeout || evlist__ctlfd_initialized(evsel_list)) if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
status = dispatch_events(forks, timeout, interval, &times); status = dispatch_events(forks, timeout, interval, &times);
...@@ -895,7 +937,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) ...@@ -895,7 +937,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (WIFSIGNALED(status)) if (WIFSIGNALED(status))
psignal(WTERMSIG(status), argv[0]); psignal(WTERMSIG(status), argv[0]);
} else { } else {
enable_counters(); err = enable_counters();
if (err)
return -1;
status = dispatch_events(forks, timeout, interval, &times); status = dispatch_events(forks, timeout, interval, &times);
} }
...@@ -1085,6 +1129,10 @@ static struct option stat_options[] = { ...@@ -1085,6 +1129,10 @@ static struct option stat_options[] = {
"stat events on existing process id"), "stat events on existing process id"),
OPT_STRING('t', "tid", &target.tid, "tid", OPT_STRING('t', "tid", &target.tid, "tid",
"stat events on existing thread id"), "stat events on existing thread id"),
#ifdef HAVE_BPF_SKEL
OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id",
"stat events on existing bpf program id"),
#endif
OPT_BOOLEAN('a', "all-cpus", &target.system_wide, OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
"system-wide collection from all CPUs"), "system-wide collection from all CPUs"),
OPT_BOOLEAN('g', "group", &group, OPT_BOOLEAN('g', "group", &group,
...@@ -2064,11 +2112,12 @@ int cmd_stat(int argc, const char **argv) ...@@ -2064,11 +2112,12 @@ int cmd_stat(int argc, const char **argv)
"perf stat [<options>] [<command>]", "perf stat [<options>] [<command>]",
NULL NULL
}; };
int status = -EINVAL, run_idx; int status = -EINVAL, run_idx, err;
const char *mode; const char *mode;
FILE *output = stderr; FILE *output = stderr;
unsigned int interval, timeout; unsigned int interval, timeout;
const char * const stat_subcommands[] = { "record", "report" }; const char * const stat_subcommands[] = { "record", "report" };
char errbuf[BUFSIZ];
setlocale(LC_ALL, ""); setlocale(LC_ALL, "");
...@@ -2179,6 +2228,12 @@ int cmd_stat(int argc, const char **argv) ...@@ -2179,6 +2228,12 @@ int cmd_stat(int argc, const char **argv)
} else if (big_num_opt == 0) /* User passed --no-big-num */ } else if (big_num_opt == 0) /* User passed --no-big-num */
stat_config.big_num = false; stat_config.big_num = false;
err = target__validate(&target);
if (err) {
target__strerror(&target, err, errbuf, BUFSIZ);
pr_warning("%s\n", errbuf);
}
setup_system_wide(argc); setup_system_wide(argc);
/* /*
...@@ -2252,8 +2307,6 @@ int cmd_stat(int argc, const char **argv) ...@@ -2252,8 +2307,6 @@ int cmd_stat(int argc, const char **argv)
} }
} }
target__validate(&target);
if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
target.per_thread = true; target.per_thread = true;
...@@ -2384,9 +2437,10 @@ int cmd_stat(int argc, const char **argv) ...@@ -2384,9 +2437,10 @@ int cmd_stat(int argc, const char **argv)
* tools remain -acme * tools remain -acme
*/ */
int fd = perf_data__fd(&perf_stat.data); int fd = perf_data__fd(&perf_stat.data);
int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
process_synthesized_event, err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
&perf_stat.session->machines.host); process_synthesized_event,
&perf_stat.session->machines.host);
if (err) { if (err) {
pr_warning("Couldn't synthesize the kernel mmap record, harmless, " pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
"older tools may produce warnings about this file\n."); "older tools may produce warnings about this file\n.");
......
...@@ -135,6 +135,7 @@ perf-y += clockid.o ...@@ -135,6 +135,7 @@ perf-y += clockid.o
perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf-loader.o
perf-$(CONFIG_LIBBPF) += bpf_map.o perf-$(CONFIG_LIBBPF) += bpf_map.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += symbol-elf.o
perf-$(CONFIG_LIBELF) += probe-file.o perf-$(CONFIG_LIBELF) += probe-file.o
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <assert.h>
#include <limits.h>
#include <unistd.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <linux/err.h>
#include <linux/zalloc.h>
#include <bpf/bpf.h>
#include <bpf/btf.h>
#include <bpf/libbpf.h>
#include "bpf_counter.h"
#include "counts.h"
#include "debug.h"
#include "evsel.h"
#include "target.h"
#include "bpf_skel/bpf_prog_profiler.skel.h"
static inline void *u64_to_ptr(__u64 ptr)
{
return (void *)(unsigned long)ptr;
}
static void set_max_rlimit(void)
{
struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
setrlimit(RLIMIT_MEMLOCK, &rinf);
}
static struct bpf_counter *bpf_counter_alloc(void)
{
struct bpf_counter *counter;
counter = zalloc(sizeof(*counter));
if (counter)
INIT_LIST_HEAD(&counter->list);
return counter;
}
static int bpf_program_profiler__destroy(struct evsel *evsel)
{
struct bpf_counter *counter, *tmp;
list_for_each_entry_safe(counter, tmp,
&evsel->bpf_counter_list, list) {
list_del_init(&counter->list);
bpf_prog_profiler_bpf__destroy(counter->skel);
free(counter);
}
assert(list_empty(&evsel->bpf_counter_list));
return 0;
}
static char *bpf_target_prog_name(int tgt_fd)
{
struct bpf_prog_info_linear *info_linear;
struct bpf_func_info *func_info;
const struct btf_type *t;
char *name = NULL;
struct btf *btf;
info_linear = bpf_program__get_prog_info_linear(
tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
if (IS_ERR_OR_NULL(info_linear)) {
pr_debug("failed to get info_linear for prog FD %d\n", tgt_fd);
return NULL;
}
if (info_linear->info.btf_id == 0 ||
btf__get_from_id(info_linear->info.btf_id, &btf)) {
pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd);
goto out;
}
func_info = u64_to_ptr(info_linear->info.func_info);
t = btf__type_by_id(btf, func_info[0].type_id);
if (!t) {
pr_debug("btf %d doesn't have type %d\n",
info_linear->info.btf_id, func_info[0].type_id);
goto out;
}
name = strdup(btf__name_by_offset(btf, t->name_off));
out:
free(info_linear);
return name;
}
static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id)
{
struct bpf_prog_profiler_bpf *skel;
struct bpf_counter *counter;
struct bpf_program *prog;
char *prog_name;
int prog_fd;
int err;
prog_fd = bpf_prog_get_fd_by_id(prog_id);
if (prog_fd < 0) {
pr_err("Failed to open fd for bpf prog %u\n", prog_id);
return -1;
}
counter = bpf_counter_alloc();
if (!counter) {
close(prog_fd);
return -1;
}
skel = bpf_prog_profiler_bpf__open();
if (!skel) {
pr_err("Failed to open bpf skeleton\n");
goto err_out;
}
skel->rodata->num_cpu = evsel__nr_cpus(evsel);
bpf_map__resize(skel->maps.events, evsel__nr_cpus(evsel));
bpf_map__resize(skel->maps.fentry_readings, 1);
bpf_map__resize(skel->maps.accum_readings, 1);
prog_name = bpf_target_prog_name(prog_fd);
if (!prog_name) {
pr_err("Failed to get program name for bpf prog %u. Does it have BTF?\n", prog_id);
goto err_out;
}
bpf_object__for_each_program(prog, skel->obj) {
err = bpf_program__set_attach_target(prog, prog_fd, prog_name);
if (err) {
pr_err("bpf_program__set_attach_target failed.\n"
"Does bpf prog %u have BTF?\n", prog_id);
goto err_out;
}
}
set_max_rlimit();
err = bpf_prog_profiler_bpf__load(skel);
if (err) {
pr_err("bpf_prog_profiler_bpf__load failed\n");
goto err_out;
}
assert(skel != NULL);
counter->skel = skel;
list_add(&counter->list, &evsel->bpf_counter_list);
close(prog_fd);
return 0;
err_out:
bpf_prog_profiler_bpf__destroy(skel);
free(counter);
close(prog_fd);
return -1;
}
static int bpf_program_profiler__load(struct evsel *evsel, struct target *target)
{
char *bpf_str, *bpf_str_, *tok, *saveptr = NULL, *p;
u32 prog_id;
int ret;
bpf_str_ = bpf_str = strdup(target->bpf_str);
if (!bpf_str)
return -1;
while ((tok = strtok_r(bpf_str, ",", &saveptr)) != NULL) {
prog_id = strtoul(tok, &p, 10);
if (prog_id == 0 || prog_id == UINT_MAX ||
(*p != '\0' && *p != ',')) {
pr_err("Failed to parse bpf prog ids %s\n",
target->bpf_str);
return -1;
}
ret = bpf_program_profiler_load_one(evsel, prog_id);
if (ret) {
bpf_program_profiler__destroy(evsel);
free(bpf_str_);
return -1;
}
bpf_str = NULL;
}
free(bpf_str_);
return 0;
}
static int bpf_program_profiler__enable(struct evsel *evsel)
{
struct bpf_counter *counter;
int ret;
list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
assert(counter->skel != NULL);
ret = bpf_prog_profiler_bpf__attach(counter->skel);
if (ret) {
bpf_program_profiler__destroy(evsel);
return ret;
}
}
return 0;
}
static int bpf_program_profiler__read(struct evsel *evsel)
{
// perf_cpu_map uses /sys/devices/system/cpu/online
int num_cpu = evsel__nr_cpus(evsel);
// BPF_MAP_TYPE_PERCPU_ARRAY uses /sys/devices/system/cpu/possible
// Sometimes possible > online, like on a Ryzen 3900X that has 24
// threads but its possible showed 0-31 -acme
int num_cpu_bpf = libbpf_num_possible_cpus();
struct bpf_perf_event_value values[num_cpu_bpf];
struct bpf_counter *counter;
int reading_map_fd;
__u32 key = 0;
int err, cpu;
if (list_empty(&evsel->bpf_counter_list))
return -EAGAIN;
for (cpu = 0; cpu < num_cpu; cpu++) {
perf_counts(evsel->counts, cpu, 0)->val = 0;
perf_counts(evsel->counts, cpu, 0)->ena = 0;
perf_counts(evsel->counts, cpu, 0)->run = 0;
}
list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
struct bpf_prog_profiler_bpf *skel = counter->skel;
assert(skel != NULL);
reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
err = bpf_map_lookup_elem(reading_map_fd, &key, values);
if (err) {
pr_err("failed to read value\n");
return err;
}
for (cpu = 0; cpu < num_cpu; cpu++) {
perf_counts(evsel->counts, cpu, 0)->val += values[cpu].counter;
perf_counts(evsel->counts, cpu, 0)->ena += values[cpu].enabled;
perf_counts(evsel->counts, cpu, 0)->run += values[cpu].running;
}
}
return 0;
}
static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu,
int fd)
{
struct bpf_prog_profiler_bpf *skel;
struct bpf_counter *counter;
int ret;
list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
skel = counter->skel;
assert(skel != NULL);
ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events),
&cpu, &fd, BPF_ANY);
if (ret)
return ret;
}
return 0;
}
struct bpf_counter_ops bpf_program_profiler_ops = {
.load = bpf_program_profiler__load,
.enable = bpf_program_profiler__enable,
.read = bpf_program_profiler__read,
.destroy = bpf_program_profiler__destroy,
.install_pe = bpf_program_profiler__install_pe,
};
int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd)
{
if (list_empty(&evsel->bpf_counter_list))
return 0;
return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd);
}
int bpf_counter__load(struct evsel *evsel, struct target *target)
{
if (target__has_bpf(target))
evsel->bpf_counter_ops = &bpf_program_profiler_ops;
if (evsel->bpf_counter_ops)
return evsel->bpf_counter_ops->load(evsel, target);
return 0;
}
int bpf_counter__enable(struct evsel *evsel)
{
if (list_empty(&evsel->bpf_counter_list))
return 0;
return evsel->bpf_counter_ops->enable(evsel);
}
int bpf_counter__read(struct evsel *evsel)
{
if (list_empty(&evsel->bpf_counter_list))
return -EAGAIN;
return evsel->bpf_counter_ops->read(evsel);
}
void bpf_counter__destroy(struct evsel *evsel)
{
if (list_empty(&evsel->bpf_counter_list))
return;
evsel->bpf_counter_ops->destroy(evsel);
evsel->bpf_counter_ops = NULL;
}
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __PERF_BPF_COUNTER_H
#define __PERF_BPF_COUNTER_H 1
#include <linux/list.h>
struct evsel;
struct target;
struct bpf_counter;
typedef int (*bpf_counter_evsel_op)(struct evsel *evsel);
typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel,
struct target *target);
typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel,
int cpu,
int fd);
struct bpf_counter_ops {
bpf_counter_evsel_target_op load;
bpf_counter_evsel_op enable;
bpf_counter_evsel_op read;
bpf_counter_evsel_op destroy;
bpf_counter_evsel_install_pe_op install_pe;
};
struct bpf_counter {
void *skel;
struct list_head list;
};
#ifdef HAVE_BPF_SKEL
int bpf_counter__load(struct evsel *evsel, struct target *target);
int bpf_counter__enable(struct evsel *evsel);
int bpf_counter__read(struct evsel *evsel);
void bpf_counter__destroy(struct evsel *evsel);
int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd);
#else /* HAVE_BPF_SKEL */
#include<linux/err.h>
static inline int bpf_counter__load(struct evsel *evsel __maybe_unused,
struct target *target __maybe_unused)
{
return 0;
}
static inline int bpf_counter__enable(struct evsel *evsel __maybe_unused)
{
return 0;
}
static inline int bpf_counter__read(struct evsel *evsel __maybe_unused)
{
return -EAGAIN;
}
static inline void bpf_counter__destroy(struct evsel *evsel __maybe_unused)
{
}
static inline int bpf_counter__install_pe(struct evsel *evsel __maybe_unused,
int cpu __maybe_unused,
int fd __maybe_unused)
{
return 0;
}
#endif /* HAVE_BPF_SKEL */
#endif /* __PERF_BPF_COUNTER_H */
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
// Copyright (c) 2020 Facebook
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
/* map of perf event fds, num_cpu * num_metric entries */
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(int));
} events SEC(".maps");
/* readings at fentry */
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(struct bpf_perf_event_value));
__uint(max_entries, 1);
} fentry_readings SEC(".maps");
/* accumulated readings */
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(struct bpf_perf_event_value));
__uint(max_entries, 1);
} accum_readings SEC(".maps");
const volatile __u32 num_cpu = 1;
SEC("fentry/XXX")
int BPF_PROG(fentry_XXX)
{
__u32 key = bpf_get_smp_processor_id();
struct bpf_perf_event_value *ptr;
__u32 zero = 0;
long err;
/* look up before reading, to reduce error */
ptr = bpf_map_lookup_elem(&fentry_readings, &zero);
if (!ptr)
return 0;
err = bpf_perf_event_read_value(&events, key, ptr, sizeof(*ptr));
if (err)
return 0;
return 0;
}
static inline void
fexit_update_maps(struct bpf_perf_event_value *after)
{
struct bpf_perf_event_value *before, diff, *accum;
__u32 zero = 0;
before = bpf_map_lookup_elem(&fentry_readings, &zero);
/* only account samples with a valid fentry_reading */
if (before && before->counter) {
struct bpf_perf_event_value *accum;
diff.counter = after->counter - before->counter;
diff.enabled = after->enabled - before->enabled;
diff.running = after->running - before->running;
accum = bpf_map_lookup_elem(&accum_readings, &zero);
if (accum) {
accum->counter += diff.counter;
accum->enabled += diff.enabled;
accum->running += diff.running;
}
}
}
SEC("fexit/XXX")
int BPF_PROG(fexit_XXX)
{
struct bpf_perf_event_value reading;
__u32 cpu = bpf_get_smp_processor_id();
__u32 one = 1, zero = 0;
int err;
/* read all events before updating the maps, to reduce error */
err = bpf_perf_event_read_value(&events, cpu, &reading, sizeof(reading));
if (err)
return 0;
fexit_update_maps(&reading);
return 0;
}
char LICENSE[] SEC("license") = "Dual BSD/GPL";
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <perf/evsel.h> #include <perf/evsel.h>
#include "asm/bug.h" #include "asm/bug.h"
#include "bpf_counter.h"
#include "callchain.h" #include "callchain.h"
#include "cgroup.h" #include "cgroup.h"
#include "counts.h" #include "counts.h"
...@@ -247,6 +248,7 @@ void evsel__init(struct evsel *evsel, ...@@ -247,6 +248,7 @@ void evsel__init(struct evsel *evsel,
evsel->bpf_obj = NULL; evsel->bpf_obj = NULL;
evsel->bpf_fd = -1; evsel->bpf_fd = -1;
INIT_LIST_HEAD(&evsel->config_terms); INIT_LIST_HEAD(&evsel->config_terms);
INIT_LIST_HEAD(&evsel->bpf_counter_list);
perf_evsel__object.init(evsel); perf_evsel__object.init(evsel);
evsel->sample_size = __evsel__sample_size(attr->sample_type); evsel->sample_size = __evsel__sample_size(attr->sample_type);
evsel__calc_id_pos(evsel); evsel__calc_id_pos(evsel);
...@@ -1368,6 +1370,7 @@ void evsel__exit(struct evsel *evsel) ...@@ -1368,6 +1370,7 @@ void evsel__exit(struct evsel *evsel)
{ {
assert(list_empty(&evsel->core.node)); assert(list_empty(&evsel->core.node));
assert(evsel->evlist == NULL); assert(evsel->evlist == NULL);
bpf_counter__destroy(evsel);
evsel__free_counts(evsel); evsel__free_counts(evsel);
perf_evsel__free_fd(&evsel->core); perf_evsel__free_fd(&evsel->core);
perf_evsel__free_id(&evsel->core); perf_evsel__free_id(&evsel->core);
...@@ -1783,6 +1786,8 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, ...@@ -1783,6 +1786,8 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
FD(evsel, cpu, thread) = fd; FD(evsel, cpu, thread) = fd;
bpf_counter__install_pe(evsel, cpu, fd);
if (unlikely(test_attr__enabled)) { if (unlikely(test_attr__enabled)) {
test_attr__open(&evsel->core.attr, pid, cpus->map[cpu], test_attr__open(&evsel->core.attr, pid, cpus->map[cpu],
fd, group_fd, flags); fd, group_fd, flags);
......
...@@ -17,6 +17,8 @@ struct cgroup; ...@@ -17,6 +17,8 @@ struct cgroup;
struct perf_counts; struct perf_counts;
struct perf_stat_evsel; struct perf_stat_evsel;
union perf_event; union perf_event;
struct bpf_counter_ops;
struct target;
typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
...@@ -127,6 +129,8 @@ struct evsel { ...@@ -127,6 +129,8 @@ struct evsel {
* See also evsel__has_callchain(). * See also evsel__has_callchain().
*/ */
__u64 synth_sample_type; __u64 synth_sample_type;
struct list_head bpf_counter_list;
struct bpf_counter_ops *bpf_counter_ops;
}; };
struct perf_missing_features { struct perf_missing_features {
...@@ -424,4 +428,5 @@ static inline bool evsel__is_dummy_event(struct evsel *evsel) ...@@ -424,4 +428,5 @@ static inline bool evsel__is_dummy_event(struct evsel *evsel)
struct perf_env *evsel__env(struct evsel *evsel); struct perf_env *evsel__env(struct evsel *evsel);
int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); int evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
#endif /* __PERF_EVSEL_H */ #endif /* __PERF_EVSEL_H */
...@@ -79,6 +79,27 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, ...@@ -79,6 +79,27 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
return 0; return 0;
} }
/*
* XXX: All these evsel destructors need some better mechanism, like a linked
* list of destructors registered when the relevant code indeed is used instead
* of having more and more calls in perf_evsel__delete(). -- acme
*
* For now, add some more:
*
* Not to drag the BPF bandwagon...
*/
void bpf_counter__destroy(struct evsel *evsel);
int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd);
void bpf_counter__destroy(struct evsel *evsel __maybe_unused)
{
}
int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, int cpu __maybe_unused, int fd __maybe_unused)
{
return 0;
}
/* /*
* Support debug printing even though util/debug.c is not linked. That means * Support debug printing even though util/debug.c is not linked. That means
* implementing 'verbose' and 'eprintf'. * implementing 'verbose' and 'eprintf'.
......
...@@ -1045,7 +1045,9 @@ static void print_header(struct perf_stat_config *config, ...@@ -1045,7 +1045,9 @@ static void print_header(struct perf_stat_config *config,
if (!config->csv_output) { if (!config->csv_output) {
fprintf(output, "\n"); fprintf(output, "\n");
fprintf(output, " Performance counter stats for "); fprintf(output, " Performance counter stats for ");
if (_target->system_wide) if (_target->bpf_str)
fprintf(output, "\'BPF program(s) %s", _target->bpf_str);
else if (_target->system_wide)
fprintf(output, "\'system wide"); fprintf(output, "\'system wide");
else if (_target->cpu_list) else if (_target->cpu_list)
fprintf(output, "\'CPU(s) %s", _target->cpu_list); fprintf(output, "\'CPU(s) %s", _target->cpu_list);
......
...@@ -527,7 +527,7 @@ int create_perf_stat_counter(struct evsel *evsel, ...@@ -527,7 +527,7 @@ int create_perf_stat_counter(struct evsel *evsel,
if (leader->core.nr_members > 1) if (leader->core.nr_members > 1)
attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
attr->inherit = !config->no_inherit; attr->inherit = !config->no_inherit && list_empty(&evsel->bpf_counter_list);
/* /*
* Some events get initialized with sample_(period/type) set, * Some events get initialized with sample_(period/type) set,
......
...@@ -56,6 +56,34 @@ enum target_errno target__validate(struct target *target) ...@@ -56,6 +56,34 @@ enum target_errno target__validate(struct target *target)
ret = TARGET_ERRNO__UID_OVERRIDE_SYSTEM; ret = TARGET_ERRNO__UID_OVERRIDE_SYSTEM;
} }
/* BPF and CPU are mutually exclusive */
if (target->bpf_str && target->cpu_list) {
target->cpu_list = NULL;
if (ret == TARGET_ERRNO__SUCCESS)
ret = TARGET_ERRNO__BPF_OVERRIDE_CPU;
}
/* BPF and PID/TID are mutually exclusive */
if (target->bpf_str && target->tid) {
target->tid = NULL;
if (ret == TARGET_ERRNO__SUCCESS)
ret = TARGET_ERRNO__BPF_OVERRIDE_PID;
}
/* BPF and UID are mutually exclusive */
if (target->bpf_str && target->uid_str) {
target->uid_str = NULL;
if (ret == TARGET_ERRNO__SUCCESS)
ret = TARGET_ERRNO__BPF_OVERRIDE_UID;
}
/* BPF and THREADS are mutually exclusive */
if (target->bpf_str && target->per_thread) {
target->per_thread = false;
if (ret == TARGET_ERRNO__SUCCESS)
ret = TARGET_ERRNO__BPF_OVERRIDE_THREAD;
}
/* THREAD and SYSTEM/CPU are mutually exclusive */ /* THREAD and SYSTEM/CPU are mutually exclusive */
if (target->per_thread && (target->system_wide || target->cpu_list)) { if (target->per_thread && (target->system_wide || target->cpu_list)) {
target->per_thread = false; target->per_thread = false;
...@@ -109,6 +137,10 @@ static const char *target__error_str[] = { ...@@ -109,6 +137,10 @@ static const char *target__error_str[] = {
"PID/TID switch overriding SYSTEM", "PID/TID switch overriding SYSTEM",
"UID switch overriding SYSTEM", "UID switch overriding SYSTEM",
"SYSTEM/CPU switch overriding PER-THREAD", "SYSTEM/CPU switch overriding PER-THREAD",
"BPF switch overriding CPU",
"BPF switch overriding PID/TID",
"BPF switch overriding UID",
"BPF switch overriding THREAD",
"Invalid User: %s", "Invalid User: %s",
"Problems obtaining information for user %s", "Problems obtaining information for user %s",
}; };
...@@ -134,7 +166,7 @@ int target__strerror(struct target *target, int errnum, ...@@ -134,7 +166,7 @@ int target__strerror(struct target *target, int errnum,
switch (errnum) { switch (errnum) {
case TARGET_ERRNO__PID_OVERRIDE_CPU ... case TARGET_ERRNO__PID_OVERRIDE_CPU ...
TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD: TARGET_ERRNO__BPF_OVERRIDE_THREAD:
snprintf(buf, buflen, "%s", msg); snprintf(buf, buflen, "%s", msg);
break; break;
......
...@@ -10,6 +10,7 @@ struct target { ...@@ -10,6 +10,7 @@ struct target {
const char *tid; const char *tid;
const char *cpu_list; const char *cpu_list;
const char *uid_str; const char *uid_str;
const char *bpf_str;
uid_t uid; uid_t uid;
bool system_wide; bool system_wide;
bool uses_mmap; bool uses_mmap;
...@@ -36,6 +37,10 @@ enum target_errno { ...@@ -36,6 +37,10 @@ enum target_errno {
TARGET_ERRNO__PID_OVERRIDE_SYSTEM, TARGET_ERRNO__PID_OVERRIDE_SYSTEM,
TARGET_ERRNO__UID_OVERRIDE_SYSTEM, TARGET_ERRNO__UID_OVERRIDE_SYSTEM,
TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD, TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD,
TARGET_ERRNO__BPF_OVERRIDE_CPU,
TARGET_ERRNO__BPF_OVERRIDE_PID,
TARGET_ERRNO__BPF_OVERRIDE_UID,
TARGET_ERRNO__BPF_OVERRIDE_THREAD,
/* for target__parse_uid() */ /* for target__parse_uid() */
TARGET_ERRNO__INVALID_UID, TARGET_ERRNO__INVALID_UID,
...@@ -59,6 +64,11 @@ static inline bool target__has_cpu(struct target *target) ...@@ -59,6 +64,11 @@ static inline bool target__has_cpu(struct target *target)
return target->system_wide || target->cpu_list; return target->system_wide || target->cpu_list;
} }
static inline bool target__has_bpf(struct target *target)
{
return target->bpf_str;
}
static inline bool target__none(struct target *target) static inline bool target__none(struct target *target)
{ {
return !target__has_task(target) && !target__has_cpu(target); return !target__has_task(target) && !target__has_cpu(target);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment