Commit d2850ce0 authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-libbpf-perf-rb-api'

Andrii Nakryiko says:

====================
This patchset adds a high-level API for setting up and polling perf buffers
associated with BPF_MAP_TYPE_PERF_EVENT_ARRAY map. Details of APIs are
described in corresponding commit.

Patch #1 adds a set of APIs to set up and work with perf buffer.
Patch #2 enhances libbpf to support auto-setting PERF_EVENT_ARRAY map size.
Patch #3 adds test.
Patch #4 converts bpftool map event_pipe to new API.
Patch #5 updates README to mention perf_buffer_ prefix.

v6->v7:
- __x64_ syscall prefix (Yonghong);
v5->v6:
- fix C99 for loop variable initialization usage (Yonghong);
v4->v5:
- initialize perf_buffer_raw_opts in bpftool map event_pipe (Jakub);
- add perf_buffer_ to README;
v3->v4:
- fixed bpftool event_pipe cmd error handling (Jakub);
v2->v3:
- added perf_buffer__new_raw for more low-level control;
- converted bpftool map event_pipe to new API (Daniel);
- fixed bug with error handling in create_maps (Song);
v1->v2:
- add auto-sizing of PERF_EVENT_ARRAY maps;
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents c3ec002e cd07a95f
......@@ -28,7 +28,7 @@
#define MMAP_PAGE_CNT 16
static bool stop;
static volatile bool stop;
struct event_ring_info {
int fd;
......@@ -44,32 +44,44 @@ struct perf_event_sample {
unsigned char data[];
};
struct perf_event_lost {
struct perf_event_header header;
__u64 id;
__u64 lost;
};
static void int_exit(int signo)
{
fprintf(stderr, "Stopping...\n");
stop = true;
}
struct event_pipe_ctx {
bool all_cpus;
int cpu;
int idx;
};
static enum bpf_perf_event_ret
print_bpf_output(struct perf_event_header *event, void *private_data)
print_bpf_output(void *private_data, int cpu, struct perf_event_header *event)
{
struct perf_event_sample *e = container_of(event, struct perf_event_sample,
struct perf_event_sample *e = container_of(event,
struct perf_event_sample,
header);
struct event_ring_info *ring = private_data;
struct {
struct perf_event_header header;
__u64 id;
__u64 lost;
} *lost = (typeof(lost))event;
struct perf_event_lost *lost = container_of(event,
struct perf_event_lost,
header);
struct event_pipe_ctx *ctx = private_data;
int idx = ctx->all_cpus ? cpu : ctx->idx;
if (json_output) {
jsonw_start_object(json_wtr);
jsonw_name(json_wtr, "type");
jsonw_uint(json_wtr, e->header.type);
jsonw_name(json_wtr, "cpu");
jsonw_uint(json_wtr, ring->cpu);
jsonw_uint(json_wtr, cpu);
jsonw_name(json_wtr, "index");
jsonw_uint(json_wtr, ring->key);
jsonw_uint(json_wtr, idx);
if (e->header.type == PERF_RECORD_SAMPLE) {
jsonw_name(json_wtr, "timestamp");
jsonw_uint(json_wtr, e->time);
......@@ -89,7 +101,7 @@ print_bpf_output(struct perf_event_header *event, void *private_data)
if (e->header.type == PERF_RECORD_SAMPLE) {
printf("== @%lld.%09lld CPU: %d index: %d =====\n",
e->time / 1000000000ULL, e->time % 1000000000ULL,
ring->cpu, ring->key);
cpu, idx);
fprint_hex(stdout, e->data, e->size, " ");
printf("\n");
} else if (e->header.type == PERF_RECORD_LOST) {
......@@ -103,87 +115,25 @@ print_bpf_output(struct perf_event_header *event, void *private_data)
return LIBBPF_PERF_EVENT_CONT;
}
static void
perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len)
{
enum bpf_perf_event_ret ret;
ret = bpf_perf_event_read_simple(ring->mem,
MMAP_PAGE_CNT * get_page_size(),
get_page_size(), buf, buf_len,
print_bpf_output, ring);
if (ret != LIBBPF_PERF_EVENT_CONT) {
fprintf(stderr, "perf read loop failed with %d\n", ret);
stop = true;
}
}
static int perf_mmap_size(void)
{
return get_page_size() * (MMAP_PAGE_CNT + 1);
}
static void *perf_event_mmap(int fd)
{
int mmap_size = perf_mmap_size();
void *base;
base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (base == MAP_FAILED) {
p_err("event mmap failed: %s\n", strerror(errno));
return NULL;
}
return base;
}
static void perf_event_unmap(void *mem)
{
if (munmap(mem, perf_mmap_size()))
fprintf(stderr, "Can't unmap ring memory!\n");
}
static int bpf_perf_event_open(int map_fd, int key, int cpu)
int do_event_pipe(int argc, char **argv)
{
struct perf_event_attr attr = {
struct perf_event_attr perf_attr = {
.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_TIME,
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_BPF_OUTPUT,
.sample_period = 1,
.wakeup_events = 1,
};
int pmu_fd;
pmu_fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
if (pmu_fd < 0) {
p_err("failed to open perf event %d for CPU %d", key, cpu);
return -1;
}
if (bpf_map_update_elem(map_fd, &key, &pmu_fd, BPF_ANY)) {
p_err("failed to update map for event %d for CPU %d", key, cpu);
goto err_close;
}
if (ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
p_err("failed to enable event %d for CPU %d", key, cpu);
goto err_close;
}
return pmu_fd;
err_close:
close(pmu_fd);
return -1;
}
int do_event_pipe(int argc, char **argv)
{
int i, nfds, map_fd, index = -1, cpu = -1;
struct bpf_map_info map_info = {};
struct event_ring_info *rings;
size_t tmp_buf_sz = 0;
void *tmp_buf = NULL;
struct pollfd *pfds;
struct perf_buffer_raw_opts opts = {};
struct event_pipe_ctx ctx = {
.all_cpus = true,
.cpu = -1,
.idx = -1,
};
struct perf_buffer *pb;
__u32 map_info_len;
bool do_all = true;
int err, map_fd;
map_info_len = sizeof(map_info);
map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len);
......@@ -205,7 +155,7 @@ int do_event_pipe(int argc, char **argv)
char *endptr;
NEXT_ARG();
cpu = strtoul(*argv, &endptr, 0);
ctx.cpu = strtoul(*argv, &endptr, 0);
if (*endptr) {
p_err("can't parse %s as CPU ID", **argv);
goto err_close_map;
......@@ -216,7 +166,7 @@ int do_event_pipe(int argc, char **argv)
char *endptr;
NEXT_ARG();
index = strtoul(*argv, &endptr, 0);
ctx.idx = strtoul(*argv, &endptr, 0);
if (*endptr) {
p_err("can't parse %s as index", **argv);
goto err_close_map;
......@@ -228,45 +178,32 @@ int do_event_pipe(int argc, char **argv)
goto err_close_map;
}
do_all = false;
ctx.all_cpus = false;
}
if (!do_all) {
if (index == -1 || cpu == -1) {
if (!ctx.all_cpus) {
if (ctx.idx == -1 || ctx.cpu == -1) {
p_err("cpu and index must be specified together");
goto err_close_map;
}
nfds = 1;
} else {
nfds = min(get_possible_cpus(), map_info.max_entries);
cpu = 0;
index = 0;
ctx.cpu = 0;
ctx.idx = 0;
}
rings = calloc(nfds, sizeof(rings[0]));
if (!rings)
opts.attr = &perf_attr;
opts.event_cb = print_bpf_output;
opts.ctx = &ctx;
opts.cpu_cnt = ctx.all_cpus ? 0 : 1;
opts.cpus = &ctx.cpu;
opts.map_keys = &ctx.idx;
pb = perf_buffer__new_raw(map_fd, MMAP_PAGE_CNT, &opts);
err = libbpf_get_error(pb);
if (err) {
p_err("failed to create perf buffer: %s (%d)",
strerror(err), err);
goto err_close_map;
pfds = calloc(nfds, sizeof(pfds[0]));
if (!pfds)
goto err_free_rings;
for (i = 0; i < nfds; i++) {
rings[i].cpu = cpu + i;
rings[i].key = index + i;
rings[i].fd = bpf_perf_event_open(map_fd, rings[i].key,
rings[i].cpu);
if (rings[i].fd < 0)
goto err_close_fds_prev;
rings[i].mem = perf_event_mmap(rings[i].fd);
if (!rings[i].mem)
goto err_close_fds_current;
pfds[i].fd = rings[i].fd;
pfds[i].events = POLLIN;
}
signal(SIGINT, int_exit);
......@@ -277,34 +214,24 @@ int do_event_pipe(int argc, char **argv)
jsonw_start_array(json_wtr);
while (!stop) {
poll(pfds, nfds, 200);
for (i = 0; i < nfds; i++)
perf_event_read(&rings[i], &tmp_buf, &tmp_buf_sz);
err = perf_buffer__poll(pb, 200);
if (err < 0 && err != -EINTR) {
p_err("perf buffer polling failed: %s (%d)",
strerror(err), err);
goto err_close_pb;
}
}
free(tmp_buf);
if (json_output)
jsonw_end_array(json_wtr);
for (i = 0; i < nfds; i++) {
perf_event_unmap(rings[i].mem);
close(rings[i].fd);
}
free(pfds);
free(rings);
perf_buffer__free(pb);
close(map_fd);
return 0;
err_close_fds_prev:
while (i--) {
perf_event_unmap(rings[i].mem);
err_close_fds_current:
close(rings[i].fd);
}
free(pfds);
err_free_rings:
free(rings);
err_close_pb:
perf_buffer__free(pb);
err_close_map:
close(map_fd);
return -1;
......
......@@ -9,7 +9,8 @@ described here. It's recommended to follow these conventions whenever a
new function or type is added to keep libbpf API clean and consistent.
All types and functions provided by libbpf API should have one of the
following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``.
following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``,
``perf_buffer_``.
System call wrappers
--------------------
......
This diff is collapsed.
......@@ -358,6 +358,26 @@ LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type,
LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
struct perf_buffer;
typedef void (*perf_buffer_sample_fn)(void *ctx, int cpu,
void *data, __u32 size);
typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt);
/* common use perf buffer options */
struct perf_buffer_opts {
/* if specified, sample_cb is called for each sample */
perf_buffer_sample_fn sample_cb;
/* if specified, lost_cb is called for each batch of lost samples */
perf_buffer_lost_fn lost_cb;
/* ctx is provided to sample_cb and lost_cb */
void *ctx;
};
LIBBPF_API struct perf_buffer *
perf_buffer__new(int map_fd, size_t page_cnt,
const struct perf_buffer_opts *opts);
enum bpf_perf_event_ret {
LIBBPF_PERF_EVENT_DONE = 0,
LIBBPF_PERF_EVENT_ERROR = -1,
......@@ -365,6 +385,35 @@ enum bpf_perf_event_ret {
};
struct perf_event_header;
typedef enum bpf_perf_event_ret
(*perf_buffer_event_fn)(void *ctx, int cpu, struct perf_event_header *event);
/* raw perf buffer options, giving most power and control */
struct perf_buffer_raw_opts {
/* perf event attrs passed directly into perf_event_open() */
struct perf_event_attr *attr;
/* raw event callback */
perf_buffer_event_fn event_cb;
/* ctx is provided to event_cb */
void *ctx;
/* if cpu_cnt == 0, open all on all possible CPUs (up to the number of
* max_entries of given PERF_EVENT_ARRAY map)
*/
int cpu_cnt;
/* if cpu_cnt > 0, cpus is an array of CPUs to open ring buffers on */
int *cpus;
/* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */
int *map_keys;
};
LIBBPF_API struct perf_buffer *
perf_buffer__new_raw(int map_fd, size_t page_cnt,
const struct perf_buffer_raw_opts *opts);
LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
typedef enum bpf_perf_event_ret
(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
void *private_data);
......
......@@ -179,4 +179,8 @@ LIBBPF_0.0.4 {
btf_dump__new;
btf__parse_elf;
libbpf_num_possible_cpus;
perf_buffer__free;
perf_buffer__new;
perf_buffer__new_raw;
perf_buffer__poll;
} LIBBPF_0.0.3;
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <pthread.h>
#include <sched.h>
#include <sys/socket.h>
#include <test_progs.h>
#ifdef __x86_64__
#define SYS_KPROBE_NAME "__x64_sys_nanosleep"
#else
#define SYS_KPROBE_NAME "sys_nanosleep"
#endif
static void on_sample(void *ctx, int cpu, void *data, __u32 size)
{
int cpu_data = *(int *)data, duration = 0;
cpu_set_t *cpu_seen = ctx;
if (cpu_data != cpu)
CHECK(cpu_data != cpu, "check_cpu_data",
"cpu_data %d != cpu %d\n", cpu_data, cpu);
CPU_SET(cpu, cpu_seen);
}
void test_perf_buffer(void)
{
int err, prog_fd, nr_cpus, i, duration = 0;
const char *prog_name = "kprobe/sys_nanosleep";
const char *file = "./test_perf_buffer.o";
struct perf_buffer_opts pb_opts = {};
struct bpf_map *perf_buf_map;
cpu_set_t cpu_set, cpu_seen;
struct bpf_program *prog;
struct bpf_object *obj;
struct perf_buffer *pb;
struct bpf_link *link;
nr_cpus = libbpf_num_possible_cpus();
if (CHECK(nr_cpus < 0, "nr_cpus", "err %d\n", nr_cpus))
return;
/* load program */
err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd);
if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
return;
prog = bpf_object__find_program_by_title(obj, prog_name);
if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
goto out_close;
/* load map */
perf_buf_map = bpf_object__find_map_by_name(obj, "perf_buf_map");
if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n"))
goto out_close;
/* attach kprobe */
link = bpf_program__attach_kprobe(prog, false /* retprobe */,
SYS_KPROBE_NAME);
if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
goto out_close;
/* set up perf buffer */
pb_opts.sample_cb = on_sample;
pb_opts.ctx = &cpu_seen;
pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
goto out_detach;
/* trigger kprobe on every CPU */
CPU_ZERO(&cpu_seen);
for (i = 0; i < nr_cpus; i++) {
CPU_ZERO(&cpu_set);
CPU_SET(i, &cpu_set);
err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set),
&cpu_set);
if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n",
i, err))
goto out_detach;
usleep(1);
}
/* read perf buffer */
err = perf_buffer__poll(pb, 100);
if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
goto out_free_pb;
if (CHECK(CPU_COUNT(&cpu_seen) != nr_cpus, "seen_cpu_cnt",
"expect %d, seen %d\n", nr_cpus, CPU_COUNT(&cpu_seen)))
goto out_free_pb;
out_free_pb:
perf_buffer__free(pb);
out_detach:
bpf_link__destroy(link);
out_close:
bpf_object__close(obj);
}
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <linux/ptrace.h>
#include <linux/bpf.h>
#include "bpf_helpers.h"
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(int));
} perf_buf_map SEC(".maps");
SEC("kprobe/sys_nanosleep")
int handle_sys_nanosleep_entry(struct pt_regs *ctx)
{
int cpu = bpf_get_smp_processor_id();
bpf_perf_event_output(ctx, &perf_buf_map, BPF_F_CURRENT_CPU,
&cpu, sizeof(cpu));
return 0;
}
char _license[] SEC("license") = "GPL";
__u32 _version SEC("version") = 1;
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment