Commit a84880ef authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-perf-rb-libbpf'

Jakub Kicinski says:

====================
This series started out as a follow up to the bpftool perf event dumping
patches.

As suggested by Daniel patch 1 makes use of PERF_SAMPLE_TIME to simplify
code and improve accuracy of timestamps.

Remaining patches are trying to move perf event loop into libbpf as
suggested by Alexei.  One user for this new function is bpftool which
links with libbpf nicely, the other, unfortunately, is in samples/bpf.
Remaining patches make samples/bpf link against full libbpf.a (not just
a handful of objects).  Once we have full power of libbpf at our disposal
we can convert some of XDP samples to use libbpf loader instead of
bpf_load.c.  My understanding is that this is the desired direction,
at least for networking code.
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents cb9c28ef be5bca44
......@@ -49,7 +49,7 @@ hostprogs-y += xdpsock
hostprogs-y += xdp_fwd
# Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
LIBBPF := ../../tools/lib/bpf/libbpf.a
CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o
......@@ -74,14 +74,14 @@ offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o $(TRACE_HELPERS)
spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o $(TRACE_HELPERS)
map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o
test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o
test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o
test_cgrp2_attach-objs := $(LIBBPF) test_cgrp2_attach.o
test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o $(CGROUP_HELPERS)
test_cgrp2_sock-objs := $(LIBBPF) test_cgrp2_sock.o
test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o $(LIBBPF)
test_cgrp2_attach-objs := test_cgrp2_attach.o $(LIBBPF)
test_cgrp2_attach2-objs := test_cgrp2_attach2.o $(LIBBPF) $(CGROUP_HELPERS)
test_cgrp2_sock-objs := test_cgrp2_sock.o $(LIBBPF)
test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o
xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o
xdp1-objs := xdp1_user.o $(LIBBPF)
# reuse xdp1 source intentionally
xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o
xdp2-objs := xdp1_user.o $(LIBBPF)
xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o
test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \
test_current_task_under_cgroup_user.o
......@@ -91,15 +91,15 @@ tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o
lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o
xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o
test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o
per_socket_stats_example-objs := cookie_uid_helper_example.o $(LIBBPF)
xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
xdp_rxq_info-objs := xdp_rxq_info_user.o $(LIBBPF)
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o
xdp_adjust_tail-objs := xdp_adjust_tail_user.o $(LIBBPF)
xdpsock-objs := bpf_load.o $(LIBBPF) xdpsock_user.o
xdp_fwd-objs := bpf_load.o $(LIBBPF) xdp_fwd_user.o
......@@ -165,6 +165,16 @@ HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
HOSTCFLAGS += -I$(srctree)/tools/perf
HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
HOSTCFLAGS_trace_helpers.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_trace_output_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
HOSTLOADLIBES_test_lru_dist += -lelf
HOSTLOADLIBES_sock_example += -lelf
HOSTLOADLIBES_fds_example += -lelf
HOSTLOADLIBES_sockex1 += -lelf
HOSTLOADLIBES_sockex2 += -lelf
......@@ -176,6 +186,10 @@ HOSTLOADLIBES_tracex4 += -lelf -lrt
HOSTLOADLIBES_tracex5 += -lelf
HOSTLOADLIBES_tracex6 += -lelf
HOSTLOADLIBES_tracex7 += -lelf
HOSTLOADLIBES_test_cgrp2_array_pin += -lelf
HOSTLOADLIBES_test_cgrp2_attach += -lelf
HOSTLOADLIBES_test_cgrp2_attach2 += -lelf
HOSTLOADLIBES_test_cgrp2_sock += -lelf
HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
HOSTLOADLIBES_load_sock_ops += -lelf
HOSTLOADLIBES_test_probe_write_user += -lelf
......@@ -195,6 +209,7 @@ HOSTLOADLIBES_tc_l2_redirect += -l elf
HOSTLOADLIBES_lwt_len_hist += -l elf
HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
HOSTLOADLIBES_test_map_in_map += -lelf
HOSTLOADLIBES_per_socket_stats_example += -lelf
HOSTLOADLIBES_xdp_redirect += -lelf
HOSTLOADLIBES_xdp_redirect_map += -lelf
HOSTLOADLIBES_xdp_redirect_cpu += -lelf
......@@ -226,7 +241,7 @@ clean:
@rm -f *~
$(LIBBPF): FORCE
$(MAKE) -C $(dir $@) $(notdir $@)
$(MAKE) -C $(dir $@)
$(obj)/syscall_nrs.s: $(src)/syscall_nrs.c
$(call if_changed_dep,cc_s_c)
......
......@@ -420,7 +420,7 @@ static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
/* Keeping compatible with ELF maps section changes
* ------------------------------------------------
* The program size of struct bpf_map_def is known by loader
* The program size of struct bpf_load_map_def is known by loader
* code, but struct stored in ELF file can be different.
*
* Unfortunately sym[i].st_size is zero. To calculate the
......@@ -429,7 +429,7 @@ static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
* symbols.
*/
map_sz_elf = data_maps->d_size / nr_maps;
map_sz_copy = sizeof(struct bpf_map_def);
map_sz_copy = sizeof(struct bpf_load_map_def);
if (map_sz_elf < map_sz_copy) {
/*
* Backward compat, loading older ELF file with
......@@ -448,8 +448,8 @@ static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
/* Memcpy relevant part of ELF maps data to loader maps */
for (i = 0; i < nr_maps; i++) {
struct bpf_load_map_def *def;
unsigned char *addr, *end;
struct bpf_map_def *def;
const char *map_name;
size_t offset;
......@@ -464,9 +464,9 @@ static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
/* Symbol value is offset into ELF maps section data area */
offset = sym[i].st_value;
def = (struct bpf_map_def *)(data_maps->d_buf + offset);
def = (struct bpf_load_map_def *)(data_maps->d_buf + offset);
maps[i].elf_offset = offset;
memset(&maps[i].def, 0, sizeof(struct bpf_map_def));
memset(&maps[i].def, 0, sizeof(struct bpf_load_map_def));
memcpy(&maps[i].def, def, map_sz_copy);
/* Verify no newer features were requested */
......
......@@ -7,7 +7,7 @@
#define MAX_MAPS 32
#define MAX_PROGS 32
struct bpf_map_def {
struct bpf_load_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
......@@ -21,7 +21,7 @@ struct bpf_map_data {
int fd;
char *name;
size_t elf_offset;
struct bpf_map_def def;
struct bpf_load_map_def def;
};
typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx);
......
......@@ -48,7 +48,7 @@ static int print_bpf_output(void *data, int size)
if (e->cookie != 0x12345678) {
printf("BUG pid %llx cookie %llx sized %d\n",
e->pid, e->cookie, size);
return PERF_EVENT_ERROR;
return LIBBPF_PERF_EVENT_ERROR;
}
cnt++;
......@@ -56,10 +56,10 @@ static int print_bpf_output(void *data, int size)
if (cnt == MAX_CNT) {
printf("recv %lld events per sec\n",
MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
return PERF_EVENT_DONE;
return LIBBPF_PERF_EVENT_DONE;
}
return PERF_EVENT_CONT;
return LIBBPF_PERF_EVENT_CONT;
}
static void test_bpf_perf_event(void)
......
......@@ -16,9 +16,9 @@
#include <libgen.h>
#include <sys/resource.h>
#include "bpf_load.h"
#include "bpf_util.h"
#include "libbpf.h"
#include "bpf/bpf.h"
#include "bpf/libbpf.h"
static int ifindex;
static __u32 xdp_flags;
......@@ -31,7 +31,7 @@ static void int_exit(int sig)
/* simple per-protocol drop counter
*/
static void poll_stats(int interval)
static void poll_stats(int map_fd, int interval)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
const unsigned int nr_keys = 256;
......@@ -47,7 +47,7 @@ static void poll_stats(int interval)
for (key = 0; key < nr_keys; key++) {
__u64 sum = 0;
assert(bpf_map_lookup_elem(map_fd[0], &key, values) == 0);
assert(bpf_map_lookup_elem(map_fd, &key, values) == 0);
for (i = 0; i < nr_cpus; i++)
sum += (values[i] - prev[key][i]);
if (sum)
......@@ -71,9 +71,14 @@ static void usage(const char *prog)
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
const char *optstr = "SN";
int prog_fd, map_fd, opt;
struct bpf_object *obj;
struct bpf_map *map;
char filename[256];
int opt;
while ((opt = getopt(argc, argv, optstr)) != -1) {
switch (opt) {
......@@ -102,13 +107,19 @@ int main(int argc, char **argv)
ifindex = strtoul(argv[optind], NULL, 0);
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return 1;
map = bpf_map__next(NULL, obj);
if (!map) {
printf("finding a map in obj file failed\n");
return 1;
}
map_fd = bpf_map__fd(map);
if (!prog_fd[0]) {
if (!prog_fd) {
printf("load_bpf_file: %s\n", strerror(errno));
return 1;
}
......@@ -116,12 +127,12 @@ int main(int argc, char **argv)
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
poll_stats(2);
poll_stats(map_fd, 2);
return 0;
}
......@@ -18,9 +18,8 @@
#include <netinet/ether.h>
#include <unistd.h>
#include <time.h>
#include "bpf_load.h"
#include "libbpf.h"
#include "bpf_util.h"
#include "bpf/bpf.h"
#include "bpf/libbpf.h"
#define STATS_INTERVAL_S 2U
......@@ -36,7 +35,7 @@ static void int_exit(int sig)
/* simple "icmp packet too big sent" counter
*/
static void poll_stats(unsigned int kill_after_s)
static void poll_stats(unsigned int map_fd, unsigned int kill_after_s)
{
time_t started_at = time(NULL);
__u64 value = 0;
......@@ -46,7 +45,7 @@ static void poll_stats(unsigned int kill_after_s)
while (!kill_after_s || time(NULL) - started_at <= kill_after_s) {
sleep(STATS_INTERVAL_S);
assert(bpf_map_lookup_elem(map_fd[0], &key, &value) == 0);
assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
printf("icmp \"packet too big\" sent: %10llu pkts\n", value);
}
......@@ -66,14 +65,17 @@ static void usage(const char *cmd)
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
unsigned char opt_flags[256] = {};
unsigned int kill_after_s = 0;
const char *optstr = "i:T:SNh";
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
int i, prog_fd, map_fd, opt;
struct bpf_object *obj;
struct bpf_map *map;
char filename[256];
int opt;
int i;
for (i = 0; i < strlen(optstr); i++)
if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z')
......@@ -115,13 +117,19 @@ int main(int argc, char **argv)
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return 1;
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
map = bpf_map__next(NULL, obj);
if (!map) {
printf("finding a map in obj file failed\n");
return 1;
}
map_fd = bpf_map__fd(map);
if (!prog_fd[0]) {
if (!prog_fd) {
printf("load_bpf_file: %s\n", strerror(errno));
return 1;
}
......@@ -129,12 +137,12 @@ int main(int argc, char **argv)
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
poll_stats(kill_after_s);
poll_stats(map_fd, kill_after_s);
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
......
......@@ -22,8 +22,8 @@ static const char *__doc__ = " XDP RX-queue info extract example\n\n"
#include <arpa/inet.h>
#include <linux/if_link.h>
#include "libbpf.h"
#include "bpf_load.h"
#include "bpf/bpf.h"
#include "bpf/libbpf.h"
#include "bpf_util.h"
static int ifindex = -1;
......@@ -32,6 +32,9 @@ static char *ifname;
static __u32 xdp_flags;
static struct bpf_map *stats_global_map;
static struct bpf_map *rx_queue_index_map;
/* Exit return codes */
#define EXIT_OK 0
#define EXIT_FAIL 1
......@@ -174,7 +177,7 @@ static struct datarec *alloc_record_per_cpu(void)
static struct record *alloc_record_per_rxq(void)
{
unsigned int nr_rxqs = map_data[2].def.max_entries;
unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
struct record *array;
size_t size;
......@@ -190,7 +193,7 @@ static struct record *alloc_record_per_rxq(void)
static struct stats_record *alloc_stats_record(void)
{
unsigned int nr_rxqs = map_data[2].def.max_entries;
unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
struct stats_record *rec;
int i;
......@@ -210,7 +213,7 @@ static struct stats_record *alloc_stats_record(void)
static void free_stats_record(struct stats_record *r)
{
unsigned int nr_rxqs = map_data[2].def.max_entries;
unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
int i;
for (i = 0; i < nr_rxqs; i++)
......@@ -254,11 +257,11 @@ static void stats_collect(struct stats_record *rec)
{
int fd, i, max_rxqs;
fd = map_data[1].fd; /* map: stats_global_map */
fd = bpf_map__fd(stats_global_map);
map_collect_percpu(fd, 0, &rec->stats);
fd = map_data[2].fd; /* map: rx_queue_index_map */
max_rxqs = map_data[2].def.max_entries;
fd = bpf_map__fd(rx_queue_index_map);
max_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
for (i = 0; i < max_rxqs; i++)
map_collect_percpu(fd, i, &rec->rxq[i]);
}
......@@ -304,8 +307,8 @@ static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev,
int action)
{
unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
unsigned int nr_cpus = bpf_num_possible_cpus();
unsigned int nr_rxqs = map_data[2].def.max_entries;
double pps = 0, err = 0;
struct record *rec, *prev;
double t;
......@@ -419,31 +422,44 @@ static void stats_poll(int interval, int action)
int main(int argc, char **argv)
{
struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
int prog_fd, map_fd, opt, err;
bool use_separators = true;
struct config cfg = { 0 };
struct bpf_object *obj;
struct bpf_map *map;
char filename[256];
int longindex = 0;
int interval = 2;
__u32 key = 0;
int opt, err;
char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 };
int action = XDP_PASS; /* Default action */
char *action_str = NULL;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK)");
return 1;
}
if (load_bpf_file(filename)) {
fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf);
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return EXIT_FAIL;
map = bpf_map__next(NULL, obj);
stats_global_map = bpf_map__next(map, obj);
rx_queue_index_map = bpf_map__next(stats_global_map, obj);
if (!map || !stats_global_map || !rx_queue_index_map) {
printf("finding a map in obj file failed\n");
return EXIT_FAIL;
}
map_fd = bpf_map__fd(map);
if (!prog_fd[0]) {
if (!prog_fd) {
fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno));
return EXIT_FAIL;
}
......@@ -512,7 +528,7 @@ int main(int argc, char **argv)
setlocale(LC_NUMERIC, "en_US");
/* User-side setup ifindex in config_map */
err = bpf_map_update_elem(map_fd[0], &key, &cfg, 0);
err = bpf_map_update_elem(map_fd, &key, &cfg, 0);
if (err) {
fprintf(stderr, "Store config failed (err:%d)\n", err);
exit(EXIT_FAIL_BPF);
......@@ -521,7 +537,7 @@ int main(int argc, char **argv)
/* Remove XDP program when program is interrupted */
signal(SIGINT, int_exit);
if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
fprintf(stderr, "link set xdp fd failed\n");
return EXIT_FAIL_XDP;
}
......
......@@ -39,6 +39,7 @@ struct event_ring_info {
struct perf_event_sample {
struct perf_event_header header;
u64 time;
__u32 size;
unsigned char data[];
};
......@@ -49,25 +50,18 @@ static void int_exit(int signo)
stop = true;
}
static void
print_bpf_output(struct event_ring_info *ring, struct perf_event_sample *e)
static enum bpf_perf_event_ret print_bpf_output(void *event, void *priv)
{
struct event_ring_info *ring = priv;
struct perf_event_sample *e = event;
struct {
struct perf_event_header header;
__u64 id;
__u64 lost;
} *lost = (void *)e;
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
perror("Can't read clock for timestamp");
return;
}
} *lost = event;
if (json_output) {
jsonw_start_object(json_wtr);
jsonw_name(json_wtr, "timestamp");
jsonw_uint(json_wtr, ts.tv_sec * 1000000000ull + ts.tv_nsec);
jsonw_name(json_wtr, "type");
jsonw_uint(json_wtr, e->header.type);
jsonw_name(json_wtr, "cpu");
......@@ -75,6 +69,8 @@ print_bpf_output(struct event_ring_info *ring, struct perf_event_sample *e)
jsonw_name(json_wtr, "index");
jsonw_uint(json_wtr, ring->key);
if (e->header.type == PERF_RECORD_SAMPLE) {
jsonw_name(json_wtr, "timestamp");
jsonw_uint(json_wtr, e->time);
jsonw_name(json_wtr, "data");
print_data_json(e->data, e->size);
} else if (e->header.type == PERF_RECORD_LOST) {
......@@ -89,8 +85,8 @@ print_bpf_output(struct event_ring_info *ring, struct perf_event_sample *e)
jsonw_end_object(json_wtr);
} else {
if (e->header.type == PERF_RECORD_SAMPLE) {
printf("== @%ld.%ld CPU: %d index: %d =====\n",
(long)ts.tv_sec, ts.tv_nsec,
printf("== @%lld.%09lld CPU: %d index: %d =====\n",
e->time / 1000000000ULL, e->time % 1000000000ULL,
ring->cpu, ring->key);
fprint_hex(stdout, e->data, e->size, " ");
printf("\n");
......@@ -101,60 +97,23 @@ print_bpf_output(struct event_ring_info *ring, struct perf_event_sample *e)
e->header.type, e->header.size);
}
}
return LIBBPF_PERF_EVENT_CONT;
}
static void
perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len)
{
volatile struct perf_event_mmap_page *header = ring->mem;
__u64 buffer_size = MMAP_PAGE_CNT * get_page_size();
__u64 data_tail = header->data_tail;
__u64 data_head = header->data_head;
void *base, *begin, *end;
asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
if (data_head == data_tail)
return;
base = ((char *)header) + get_page_size();
begin = base + data_tail % buffer_size;
end = base + data_head % buffer_size;
while (begin != end) {
struct perf_event_sample *e;
e = begin;
if (begin + e->header.size > base + buffer_size) {
long len = base + buffer_size - begin;
if (*buf_len < e->header.size) {
free(*buf);
*buf = malloc(e->header.size);
if (!*buf) {
fprintf(stderr,
"can't allocate memory");
enum bpf_perf_event_ret ret;
ret = bpf_perf_event_read_simple(ring->mem,
MMAP_PAGE_CNT * get_page_size(),
get_page_size(), buf, buf_len,
print_bpf_output, ring);
if (ret != LIBBPF_PERF_EVENT_CONT) {
fprintf(stderr, "perf read loop failed with %d\n", ret);
stop = true;
return;
}
*buf_len = e->header.size;
}
memcpy(*buf, begin, len);
memcpy(*buf + len, base, e->header.size - len);
e = (void *)*buf;
begin = base + e->header.size - len;
} else if (begin + e->header.size == base + buffer_size) {
begin = base;
} else {
begin += e->header.size;
}
print_bpf_output(ring, e);
}
__sync_synchronize(); /* smp_mb() */
header->data_tail = data_head;
}
static int perf_mmap_size(void)
......@@ -185,7 +144,7 @@ static void perf_event_unmap(void *mem)
static int bpf_perf_event_open(int map_fd, int key, int cpu)
{
struct perf_event_attr attr = {
.sample_type = PERF_SAMPLE_RAW,
.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_TIME,
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_BPF_OUTPUT,
};
......
......@@ -69,7 +69,7 @@ FEATURE_USER = .libbpf
FEATURE_TESTS = libelf libelf-getphdrnum libelf-mmap bpf
FEATURE_DISPLAY = libelf bpf
INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi -I$(srctree)/tools/perf
FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
check_feat := 1
......
......@@ -31,6 +31,7 @@
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <perf-sys.h>
#include <asm/unistd.h>
#include <linux/err.h>
#include <linux/kernel.h>
......@@ -1437,9 +1438,37 @@ bpf_object__load_progs(struct bpf_object *obj)
return 0;
}
static int bpf_object__validate(struct bpf_object *obj)
static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
{
switch (type) {
case BPF_PROG_TYPE_SOCKET_FILTER:
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
case BPF_PROG_TYPE_XDP:
case BPF_PROG_TYPE_CGROUP_SKB:
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_OUT:
case BPF_PROG_TYPE_LWT_XMIT:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_SK_SKB:
case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_SK_MSG:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
return false;
case BPF_PROG_TYPE_UNSPEC:
case BPF_PROG_TYPE_KPROBE:
case BPF_PROG_TYPE_TRACEPOINT:
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_RAW_TRACEPOINT:
default:
return true;
}
}
static int bpf_object__validate(struct bpf_object *obj, bool needs_kver)
{
if (obj->kern_version == 0) {
if (needs_kver && obj->kern_version == 0) {
pr_warning("%s doesn't provide kernel version\n",
obj->path);
return -LIBBPF_ERRNO__KVERSION;
......@@ -1448,7 +1477,8 @@ static int bpf_object__validate(struct bpf_object *obj)
}
static struct bpf_object *
__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz)
__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz,
bool needs_kver)
{
struct bpf_object *obj;
int err;
......@@ -1466,7 +1496,7 @@ __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz)
CHECK_ERR(bpf_object__check_endianness(obj), err, out);
CHECK_ERR(bpf_object__elf_collect(obj), err, out);
CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
CHECK_ERR(bpf_object__validate(obj), err, out);
CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out);
bpf_object__elf_finish(obj);
return obj;
......@@ -1483,7 +1513,7 @@ struct bpf_object *bpf_object__open(const char *path)
pr_debug("loading %s\n", path);
return __bpf_object__open(path, NULL, 0);
return __bpf_object__open(path, NULL, 0, true);
}
struct bpf_object *bpf_object__open_buffer(void *obj_buf,
......@@ -1506,7 +1536,7 @@ struct bpf_object *bpf_object__open_buffer(void *obj_buf,
pr_debug("loading object '%s' from buffer\n",
name);
return __bpf_object__open(name, obj_buf, obj_buf_sz);
return __bpf_object__open(name, obj_buf, obj_buf_sz, true);
}
int bpf_object__unload(struct bpf_object *obj)
......@@ -2163,8 +2193,11 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
if (!attr)
return -EINVAL;
if (!attr->file)
return -EINVAL;
obj = bpf_object__open(attr->file);
obj = __bpf_object__open(attr->file, NULL, 0,
bpf_prog_type__needs_kver(attr->prog_type));
if (IS_ERR(obj))
return -ENOENT;
......@@ -2210,3 +2243,63 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
*prog_fd = bpf_program__fd(first_prog);
return 0;
}
enum bpf_perf_event_ret
bpf_perf_event_read_simple(void *mem, unsigned long size,
unsigned long page_size, void **buf, size_t *buf_len,
bpf_perf_event_print_t fn, void *priv)
{
volatile struct perf_event_mmap_page *header = mem;
__u64 data_tail = header->data_tail;
__u64 data_head = header->data_head;
void *base, *begin, *end;
int ret;
asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
if (data_head == data_tail)
return LIBBPF_PERF_EVENT_CONT;
base = ((char *)header) + page_size;
begin = base + data_tail % size;
end = base + data_head % size;
while (begin != end) {
struct perf_event_header *ehdr;
ehdr = begin;
if (begin + ehdr->size > base + size) {
long len = base + size - begin;
if (*buf_len < ehdr->size) {
free(*buf);
*buf = malloc(ehdr->size);
if (!*buf) {
ret = LIBBPF_PERF_EVENT_ERROR;
break;
}
*buf_len = ehdr->size;
}
memcpy(*buf, begin, len);
memcpy(*buf + len, base, ehdr->size - len);
ehdr = (void *)*buf;
begin = base + ehdr->size - len;
} else if (begin + ehdr->size == base + size) {
begin = base;
} else {
begin += ehdr->size;
}
ret = fn(ehdr, priv);
if (ret != LIBBPF_PERF_EVENT_CONT)
break;
data_tail += ehdr->size;
}
__sync_synchronize(); /* smp_mb() */
header->data_tail = data_tail;
return ret;
}
......@@ -52,8 +52,8 @@ enum libbpf_errno {
int libbpf_strerror(int err, char *buf, size_t size);
/*
* In include/linux/compiler-gcc.h, __printf is defined. However
* it should be better if libbpf.h doesn't depend on Linux header file.
* __printf is defined in include/linux/compiler-gcc.h. However,
* it would be better if libbpf.h didn't depend on Linux header files.
* So instead of __printf, here we use gcc attribute directly.
*/
typedef int (*libbpf_print_fn_t)(const char *, ...)
......@@ -92,7 +92,7 @@ int bpf_object__set_priv(struct bpf_object *obj, void *priv,
bpf_object_clear_priv_t clear_priv);
void *bpf_object__priv(struct bpf_object *prog);
/* Accessors of bpf_program. */
/* Accessors of bpf_program */
struct bpf_program;
struct bpf_program *bpf_program__next(struct bpf_program *prog,
struct bpf_object *obj);
......@@ -121,28 +121,28 @@ struct bpf_insn;
/*
* Libbpf allows callers to adjust BPF programs before being loaded
* into kernel. One program in an object file can be transform into
* multiple variants to be attached to different code.
* into kernel. One program in an object file can be transformed into
* multiple variants to be attached to different hooks.
*
* bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
* are APIs for this propose.
* form an API for this purpose.
*
* - bpf_program_prep_t:
* It defines 'preprocessor', which is a caller defined function
* Defines a 'preprocessor', which is a caller defined function
* passed to libbpf through bpf_program__set_prep(), and will be
* called before program is loaded. The processor should adjust
* the program one time for each instances according to the number
* the program one time for each instance according to the instance id
* passed to it.
*
* - bpf_program__set_prep:
* Attachs a preprocessor to a BPF program. The number of instances
* whould be created is also passed through this function.
* Attaches a preprocessor to a BPF program. The number of instances
* that should be created is also passed through this function.
*
* - bpf_program__nth_fd:
* After the program is loaded, get resuling fds from bpf program for
* each instances.
* After the program is loaded, get resulting FD of a given instance
* of the BPF program.
*
* If bpf_program__set_prep() is not used, the program whould be loaded
* If bpf_program__set_prep() is not used, the program would be loaded
* without adjustment during bpf_object__load(). The program has only
* one instance. In this case bpf_program__fd(prog) is equal to
* bpf_program__nth_fd(prog, 0).
......@@ -156,7 +156,7 @@ struct bpf_prog_prep_result {
struct bpf_insn *new_insn_ptr;
int new_insn_cnt;
/* If not NULL, result fd is set to it */
/* If not NULL, result FD is written to it. */
int *pfd;
};
......@@ -170,7 +170,7 @@ struct bpf_prog_prep_result {
*
* Return value:
* - Zero: pre-processing success.
* - Non-zero: pre-processing, stop loading.
* - Non-zero: pre-processing error, stop loading.
*/
typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
struct bpf_insn *insns, int insns_cnt,
......@@ -182,7 +182,7 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
int bpf_program__nth_fd(struct bpf_program *prog, int n);
/*
* Adjust type of bpf program. Default is kprobe.
* Adjust type of BPF program. Default is kprobe.
*/
int bpf_program__set_socket_filter(struct bpf_program *prog);
int bpf_program__set_tracepoint(struct bpf_program *prog);
......@@ -206,10 +206,10 @@ bool bpf_program__is_xdp(struct bpf_program *prog);
bool bpf_program__is_perf_event(struct bpf_program *prog);
/*
* We don't need __attribute__((packed)) now since it is
* unnecessary for 'bpf_map_def' because they are all aligned.
* In addition, using it will trigger -Wpacked warning message,
* and will be treated as an error due to -Werror.
* No need for __attribute__((packed)), all members of 'bpf_map_def'
* are all aligned. In addition, using __attribute__((packed))
* would trigger a -Wpacked warning message, and lead to an error
* if -Werror is set.
*/
struct bpf_map_def {
unsigned int type;
......@@ -220,8 +220,8 @@ struct bpf_map_def {
};
/*
* There is another 'struct bpf_map' in include/linux/map.h. However,
* it is not a uapi header so no need to consider name clash.
* The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel,
* so no need to worry about a name clash.
*/
struct bpf_map;
struct bpf_map *
......@@ -229,7 +229,7 @@ bpf_object__find_map_by_name(struct bpf_object *obj, const char *name);
/*
* Get bpf_map through the offset of corresponding struct bpf_map_def
* in the bpf object file.
* in the BPF object file.
*/
struct bpf_map *
bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
......@@ -267,4 +267,17 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
struct bpf_object **pobj, int *prog_fd);
int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
enum bpf_perf_event_ret {
LIBBPF_PERF_EVENT_DONE = 0,
LIBBPF_PERF_EVENT_ERROR = -1,
LIBBPF_PERF_EVENT_CONT = -2,
};
typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(void *event,
void *priv);
int bpf_perf_event_read_simple(void *mem, unsigned long size,
unsigned long page_size,
void **buf, size_t *buf_len,
bpf_perf_event_print_t fn, void *priv);
#endif
......@@ -10,7 +10,7 @@ ifneq ($(wildcard $(GENHDR)),)
GENFLAGS := -DHAVE_GENHDR
endif
CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
LDLIBS += -lcap -lelf -lrt -lpthread
TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
......
......@@ -1337,12 +1337,12 @@ static int get_stack_print_output(void *data, int size)
good_user_stack = true;
}
if (!good_kern_stack || !good_user_stack)
return PERF_EVENT_ERROR;
return LIBBPF_PERF_EVENT_ERROR;
if (cnt == MAX_CNT_RAWTP)
return PERF_EVENT_DONE;
return LIBBPF_PERF_EVENT_DONE;
return PERF_EVENT_CONT;
return LIBBPF_PERF_EVENT_CONT;
}
static void test_get_stack_raw_tp(void)
......
......@@ -74,7 +74,7 @@ struct ksym *ksym_search(long key)
static int page_size;
static int page_cnt = 8;
static volatile struct perf_event_mmap_page *header;
static struct perf_event_mmap_page *header;
int perf_event_mmap(int fd)
{
......@@ -107,45 +107,15 @@ struct perf_event_sample {
char data[];
};
static int perf_event_read(perf_event_print_fn fn)
static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv)
{
__u64 data_tail = header->data_tail;
__u64 data_head = header->data_head;
__u64 buffer_size = page_cnt * page_size;
void *base, *begin, *end;
char buf[256];
struct perf_event_sample *e = event;
perf_event_print_fn fn = priv;
int ret;
asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
if (data_head == data_tail)
return PERF_EVENT_CONT;
base = ((char *)header) + page_size;
begin = base + data_tail % buffer_size;
end = base + data_head % buffer_size;
while (begin != end) {
struct perf_event_sample *e;
e = begin;
if (begin + e->header.size > base + buffer_size) {
long len = base + buffer_size - begin;
assert(len < e->header.size);
memcpy(buf, begin, len);
memcpy(buf + len, base, e->header.size - len);
e = (void *) buf;
begin = base + e->header.size - len;
} else if (begin + e->header.size == base + buffer_size) {
begin = base;
} else {
begin += e->header.size;
}
if (e->header.type == PERF_RECORD_SAMPLE) {
ret = fn(e->data, e->size);
if (ret != PERF_EVENT_CONT)
if (ret != LIBBPF_PERF_EVENT_CONT)
return ret;
} else if (e->header.type == PERF_RECORD_LOST) {
struct {
......@@ -158,23 +128,26 @@ static int perf_event_read(perf_event_print_fn fn)
printf("unknown event type=%d size=%d\n",
e->header.type, e->header.size);
}
}
__sync_synchronize(); /* smp_mb() */
header->data_tail = data_head;
return PERF_EVENT_CONT;
return LIBBPF_PERF_EVENT_CONT;
}
int perf_event_poller(int fd, perf_event_print_fn output_fn)
{
int ret;
enum bpf_perf_event_ret ret;
void *buf = NULL;
size_t len = 0;
for (;;) {
perf_event_poll(fd);
ret = perf_event_read(output_fn);
if (ret != PERF_EVENT_CONT)
return ret;
ret = bpf_perf_event_read_simple(header, page_cnt * page_size,
page_size, &buf, &len,
bpf_perf_event_print,
output_fn);
if (ret != LIBBPF_PERF_EVENT_CONT)
break;
}
free(buf);
return PERF_EVENT_DONE;
return ret;
}
......@@ -2,6 +2,8 @@
#ifndef __TRACE_HELPER_H
#define __TRACE_HELPER_H
#include <libbpf.h>
struct ksym {
long addr;
char *name;
......@@ -10,14 +12,9 @@ struct ksym {
int load_kallsyms(void);
struct ksym *ksym_search(long key);
typedef int (*perf_event_print_fn)(void *data, int size);
/* return code for perf_event_print_fn */
#define PERF_EVENT_DONE 0
#define PERF_EVENT_ERROR -1
#define PERF_EVENT_CONT -2
typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size);
int perf_event_mmap(int fd);
/* return PERF_EVENT_DONE or PERF_EVENT_ERROR */
/* return LIBBPF_PERF_EVENT_DONE or LIBBPF_PERF_EVENT_ERROR */
int perf_event_poller(int fd, perf_event_print_fn output_fn);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment