Commit 830382e4 authored by Andrii Nakryiko's avatar Andrii Nakryiko

Merge branch 'bpf: remove bpf_load loader completely'

"Daniel T. says:

====================

Numerous refactoring that rewrites BPF programs written with bpf_load
to use the libbpf loader was finally completed, resulting in BPF
programs using bpf_load within the kernel being completely no longer
present.

This patchset refactors remaining bpf programs with libbpf and
completely removes bpf_load, an outdated bpf loader that is difficult
to keep up with the latest kernel BPF and causes confusion.

Changes in v2:
 - drop 'move tracing helpers to trace_helper' patch
 - add link pinning to prevent cleaning up on process exit
 - add static at global variable and remove unused variable
 - change to destroy link even after link__pin()
 - fix return error code on exit
 - merge commit with changing Makefile

Changes in v3:
 - cleanup bpf_link, bpf_object and cgroup fd both on success and error
====================
Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
parents fb355812 ceb5dea5
...@@ -52,3 +52,6 @@ xdp_tx_iptunnel ...@@ -52,3 +52,6 @@ xdp_tx_iptunnel
xdpsock xdpsock
xsk_fwd xsk_fwd
testfile.img testfile.img
hbm_out.log
iperf.*
*.out
...@@ -73,16 +73,16 @@ tracex5-objs := tracex5_user.o $(TRACE_HELPERS) ...@@ -73,16 +73,16 @@ tracex5-objs := tracex5_user.o $(TRACE_HELPERS)
tracex6-objs := tracex6_user.o tracex6-objs := tracex6_user.o
tracex7-objs := tracex7_user.o tracex7-objs := tracex7_user.o
test_probe_write_user-objs := test_probe_write_user_user.o test_probe_write_user-objs := test_probe_write_user_user.o
trace_output-objs := trace_output_user.o $(TRACE_HELPERS) trace_output-objs := trace_output_user.o
lathist-objs := lathist_user.o lathist-objs := lathist_user.o
offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS) offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS)
spintest-objs := spintest_user.o $(TRACE_HELPERS) spintest-objs := spintest_user.o $(TRACE_HELPERS)
map_perf_test-objs := map_perf_test_user.o map_perf_test-objs := map_perf_test_user.o
test_overhead-objs := bpf_load.o test_overhead_user.o test_overhead-objs := test_overhead_user.o
test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
test_cgrp2_attach-objs := test_cgrp2_attach.o test_cgrp2_attach-objs := test_cgrp2_attach.o
test_cgrp2_sock-objs := test_cgrp2_sock.o test_cgrp2_sock-objs := test_cgrp2_sock.o
test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o test_cgrp2_sock2-objs := test_cgrp2_sock2.o
xdp1-objs := xdp1_user.o xdp1-objs := xdp1_user.o
# reuse xdp1 source intentionally # reuse xdp1 source intentionally
xdp2-objs := xdp1_user.o xdp2-objs := xdp1_user.o
...@@ -91,8 +91,8 @@ test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \ ...@@ -91,8 +91,8 @@ test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \
test_current_task_under_cgroup_user.o test_current_task_under_cgroup_user.o
trace_event-objs := trace_event_user.o $(TRACE_HELPERS) trace_event-objs := trace_event_user.o $(TRACE_HELPERS)
sampleip-objs := sampleip_user.o $(TRACE_HELPERS) sampleip-objs := sampleip_user.o $(TRACE_HELPERS)
tc_l2_redirect-objs := bpf_load.o tc_l2_redirect_user.o tc_l2_redirect-objs := tc_l2_redirect_user.o
lwt_len_hist-objs := bpf_load.o lwt_len_hist_user.o lwt_len_hist-objs := lwt_len_hist_user.o
xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o
test_map_in_map-objs := test_map_in_map_user.o test_map_in_map-objs := test_map_in_map_user.o
per_socket_stats_example-objs := cookie_uid_helper_example.o per_socket_stats_example-objs := cookie_uid_helper_example.o
...@@ -107,10 +107,10 @@ xdp_adjust_tail-objs := xdp_adjust_tail_user.o ...@@ -107,10 +107,10 @@ xdp_adjust_tail-objs := xdp_adjust_tail_user.o
xdpsock-objs := xdpsock_user.o xdpsock-objs := xdpsock_user.o
xsk_fwd-objs := xsk_fwd.o xsk_fwd-objs := xsk_fwd.o
xdp_fwd-objs := xdp_fwd_user.o xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS)
xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o
ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS) ibumad-objs := ibumad_user.o
hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS) hbm-objs := hbm.o $(CGROUP_HELPERS)
# Tell kbuild to always build the programs # Tell kbuild to always build the programs
always-y := $(tprogs-y) always-y := $(tprogs-y)
...@@ -197,8 +197,6 @@ TPROGS_CFLAGS += --sysroot=$(SYSROOT) ...@@ -197,8 +197,6 @@ TPROGS_CFLAGS += --sysroot=$(SYSROOT)
TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib
endif endif
TPROGCFLAGS_bpf_load.o += -Wno-unused-variable
TPROGS_LDLIBS += $(LIBBPF) -lelf -lz TPROGS_LDLIBS += $(LIBBPF) -lelf -lz
TPROGLDLIBS_tracex4 += -lrt TPROGLDLIBS_tracex4 += -lrt
TPROGLDLIBS_trace_output += -lrt TPROGLDLIBS_trace_output += -lrt
......
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <libelf.h>
#include <gelf.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <stdbool.h>
#include <stdlib.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/perf_event.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/types.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <poll.h>
#include <ctype.h>
#include <assert.h>
#include <bpf/bpf.h>
#include "bpf_load.h"
#include "perf-sys.h"
#define DEBUGFS "/sys/kernel/debug/tracing/"
static char license[128];
static int kern_version;
static bool processed_sec[128];
char bpf_log_buf[BPF_LOG_BUF_SIZE];
int map_fd[MAX_MAPS];
int prog_fd[MAX_PROGS];
int event_fd[MAX_PROGS];
int prog_cnt;
int prog_array_fd = -1;
struct bpf_map_data map_data[MAX_MAPS];
int map_data_count;
static int populate_prog_array(const char *event, int prog_fd)
{
int ind = atoi(event), err;
err = bpf_map_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY);
if (err < 0) {
printf("failed to store prog_fd in prog_array\n");
return -1;
}
return 0;
}
static int write_kprobe_events(const char *val)
{
int fd, ret, flags;
if (val == NULL)
return -1;
else if (val[0] == '\0')
flags = O_WRONLY | O_TRUNC;
else
flags = O_WRONLY | O_APPEND;
fd = open(DEBUGFS "kprobe_events", flags);
ret = write(fd, val, strlen(val));
close(fd);
return ret;
}
static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
{
bool is_socket = strncmp(event, "socket", 6) == 0;
bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
bool is_raw_tracepoint = strncmp(event, "raw_tracepoint/", 15) == 0;
bool is_xdp = strncmp(event, "xdp", 3) == 0;
bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
bool is_sockops = strncmp(event, "sockops", 7) == 0;
bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0;
size_t insns_cnt = size / sizeof(struct bpf_insn);
enum bpf_prog_type prog_type;
char buf[256];
int fd, efd, err, id;
struct perf_event_attr attr = {};
attr.type = PERF_TYPE_TRACEPOINT;
attr.sample_type = PERF_SAMPLE_RAW;
attr.sample_period = 1;
attr.wakeup_events = 1;
if (is_socket) {
prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
} else if (is_kprobe || is_kretprobe) {
prog_type = BPF_PROG_TYPE_KPROBE;
} else if (is_tracepoint) {
prog_type = BPF_PROG_TYPE_TRACEPOINT;
} else if (is_raw_tracepoint) {
prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT;
} else if (is_xdp) {
prog_type = BPF_PROG_TYPE_XDP;
} else if (is_perf_event) {
prog_type = BPF_PROG_TYPE_PERF_EVENT;
} else if (is_cgroup_skb) {
prog_type = BPF_PROG_TYPE_CGROUP_SKB;
} else if (is_cgroup_sk) {
prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
} else if (is_sockops) {
prog_type = BPF_PROG_TYPE_SOCK_OPS;
} else if (is_sk_skb) {
prog_type = BPF_PROG_TYPE_SK_SKB;
} else if (is_sk_msg) {
prog_type = BPF_PROG_TYPE_SK_MSG;
} else {
printf("Unknown event '%s'\n", event);
return -1;
}
if (prog_cnt == MAX_PROGS)
return -1;
fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version,
bpf_log_buf, BPF_LOG_BUF_SIZE);
if (fd < 0) {
printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf);
return -1;
}
prog_fd[prog_cnt++] = fd;
if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
return 0;
if (is_socket || is_sockops || is_sk_skb || is_sk_msg) {
if (is_socket)
event += 6;
else
event += 7;
if (*event != '/')
return 0;
event++;
if (!isdigit(*event)) {
printf("invalid prog number\n");
return -1;
}
return populate_prog_array(event, fd);
}
if (is_raw_tracepoint) {
efd = bpf_raw_tracepoint_open(event + 15, fd);
if (efd < 0) {
printf("tracepoint %s %s\n", event + 15, strerror(errno));
return -1;
}
event_fd[prog_cnt - 1] = efd;
return 0;
}
if (is_kprobe || is_kretprobe) {
bool need_normal_check = true;
const char *event_prefix = "";
if (is_kprobe)
event += 7;
else
event += 10;
if (*event == 0) {
printf("event name cannot be empty\n");
return -1;
}
if (isdigit(*event))
return populate_prog_array(event, fd);
#ifdef __x86_64__
if (strncmp(event, "sys_", 4) == 0) {
snprintf(buf, sizeof(buf), "%c:__x64_%s __x64_%s",
is_kprobe ? 'p' : 'r', event, event);
err = write_kprobe_events(buf);
if (err >= 0) {
need_normal_check = false;
event_prefix = "__x64_";
}
}
#endif
if (need_normal_check) {
snprintf(buf, sizeof(buf), "%c:%s %s",
is_kprobe ? 'p' : 'r', event, event);
err = write_kprobe_events(buf);
if (err < 0) {
printf("failed to create kprobe '%s' error '%s'\n",
event, strerror(errno));
return -1;
}
}
strcpy(buf, DEBUGFS);
strcat(buf, "events/kprobes/");
strcat(buf, event_prefix);
strcat(buf, event);
strcat(buf, "/id");
} else if (is_tracepoint) {
event += 11;
if (*event == 0) {
printf("event name cannot be empty\n");
return -1;
}
strcpy(buf, DEBUGFS);
strcat(buf, "events/");
strcat(buf, event);
strcat(buf, "/id");
}
efd = open(buf, O_RDONLY, 0);
if (efd < 0) {
printf("failed to open event %s\n", event);
return -1;
}
err = read(efd, buf, sizeof(buf));
if (err < 0 || err >= sizeof(buf)) {
printf("read from '%s' failed '%s'\n", event, strerror(errno));
return -1;
}
close(efd);
buf[err] = 0;
id = atoi(buf);
attr.config = id;
efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
if (efd < 0) {
printf("event %d fd %d err %s\n", id, efd, strerror(errno));
return -1;
}
event_fd[prog_cnt - 1] = efd;
err = ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
if (err < 0) {
printf("ioctl PERF_EVENT_IOC_ENABLE failed err %s\n",
strerror(errno));
return -1;
}
err = ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
if (err < 0) {
printf("ioctl PERF_EVENT_IOC_SET_BPF failed err %s\n",
strerror(errno));
return -1;
}
return 0;
}
static int load_maps(struct bpf_map_data *maps, int nr_maps,
fixup_map_cb fixup_map)
{
int i, numa_node;
for (i = 0; i < nr_maps; i++) {
if (fixup_map) {
fixup_map(&maps[i], i);
/* Allow userspace to assign map FD prior to creation */
if (maps[i].fd != -1) {
map_fd[i] = maps[i].fd;
continue;
}
}
numa_node = maps[i].def.map_flags & BPF_F_NUMA_NODE ?
maps[i].def.numa_node : -1;
if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
int inner_map_fd = map_fd[maps[i].def.inner_map_idx];
map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type,
maps[i].name,
maps[i].def.key_size,
inner_map_fd,
maps[i].def.max_entries,
maps[i].def.map_flags,
numa_node);
} else {
map_fd[i] = bpf_create_map_node(maps[i].def.type,
maps[i].name,
maps[i].def.key_size,
maps[i].def.value_size,
maps[i].def.max_entries,
maps[i].def.map_flags,
numa_node);
}
if (map_fd[i] < 0) {
printf("failed to create map %d (%s): %d %s\n",
i, maps[i].name, errno, strerror(errno));
return 1;
}
maps[i].fd = map_fd[i];
if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY)
prog_array_fd = map_fd[i];
}
return 0;
}
static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
GElf_Shdr *shdr, Elf_Data **data)
{
Elf_Scn *scn;
scn = elf_getscn(elf, i);
if (!scn)
return 1;
if (gelf_getshdr(scn, shdr) != shdr)
return 2;
*shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name);
if (!*shname || !shdr->sh_size)
return 3;
*data = elf_getdata(scn, 0);
if (!*data || elf_getdata(scn, *data) != NULL)
return 4;
return 0;
}
static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
GElf_Shdr *shdr, struct bpf_insn *insn,
struct bpf_map_data *maps, int nr_maps)
{
int i, nrels;
nrels = shdr->sh_size / shdr->sh_entsize;
for (i = 0; i < nrels; i++) {
GElf_Sym sym;
GElf_Rel rel;
unsigned int insn_idx;
bool match = false;
int j, map_idx;
gelf_getrel(data, i, &rel);
insn_idx = rel.r_offset / sizeof(struct bpf_insn);
gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym);
if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
printf("invalid relo for insn[%d].code 0x%x\n",
insn_idx, insn[insn_idx].code);
return 1;
}
insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
/* Match FD relocation against recorded map_data[] offset */
for (map_idx = 0; map_idx < nr_maps; map_idx++) {
if (maps[map_idx].elf_offset == sym.st_value) {
match = true;
break;
}
}
if (match) {
insn[insn_idx].imm = maps[map_idx].fd;
} else {
printf("invalid relo for insn[%d] no map_data match\n",
insn_idx);
return 1;
}
}
return 0;
}
static int cmp_symbols(const void *l, const void *r)
{
const GElf_Sym *lsym = (const GElf_Sym *)l;
const GElf_Sym *rsym = (const GElf_Sym *)r;
if (lsym->st_value < rsym->st_value)
return -1;
else if (lsym->st_value > rsym->st_value)
return 1;
else
return 0;
}
static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
Elf *elf, Elf_Data *symbols, int strtabidx)
{
int map_sz_elf, map_sz_copy;
bool validate_zero = false;
Elf_Data *data_maps;
int i, nr_maps;
GElf_Sym *sym;
Elf_Scn *scn;
int copy_sz;
if (maps_shndx < 0)
return -EINVAL;
if (!symbols)
return -EINVAL;
/* Get data for maps section via elf index */
scn = elf_getscn(elf, maps_shndx);
if (scn)
data_maps = elf_getdata(scn, NULL);
if (!scn || !data_maps) {
printf("Failed to get Elf_Data from maps section %d\n",
maps_shndx);
return -EINVAL;
}
/* For each map get corrosponding symbol table entry */
sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym));
for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
assert(nr_maps < MAX_MAPS+1);
if (!gelf_getsym(symbols, i, &sym[nr_maps]))
continue;
if (sym[nr_maps].st_shndx != maps_shndx)
continue;
/* Only increment iif maps section */
nr_maps++;
}
/* Align to map_fd[] order, via sort on offset in sym.st_value */
qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols);
/* Keeping compatible with ELF maps section changes
* ------------------------------------------------
* The program size of struct bpf_load_map_def is known by loader
* code, but struct stored in ELF file can be different.
*
* Unfortunately sym[i].st_size is zero. To calculate the
* struct size stored in the ELF file, assume all struct have
* the same size, and simply divide with number of map
* symbols.
*/
map_sz_elf = data_maps->d_size / nr_maps;
map_sz_copy = sizeof(struct bpf_load_map_def);
if (map_sz_elf < map_sz_copy) {
/*
* Backward compat, loading older ELF file with
* smaller struct, keeping remaining bytes zero.
*/
map_sz_copy = map_sz_elf;
} else if (map_sz_elf > map_sz_copy) {
/*
* Forward compat, loading newer ELF file with larger
* struct with unknown features. Assume zero means
* feature not used. Thus, validate rest of struct
* data is zero.
*/
validate_zero = true;
}
/* Memcpy relevant part of ELF maps data to loader maps */
for (i = 0; i < nr_maps; i++) {
struct bpf_load_map_def *def;
unsigned char *addr, *end;
const char *map_name;
size_t offset;
map_name = elf_strptr(elf, strtabidx, sym[i].st_name);
maps[i].name = strdup(map_name);
if (!maps[i].name) {
printf("strdup(%s): %s(%d)\n", map_name,
strerror(errno), errno);
free(sym);
return -errno;
}
/* Symbol value is offset into ELF maps section data area */
offset = sym[i].st_value;
def = (struct bpf_load_map_def *)(data_maps->d_buf + offset);
maps[i].elf_offset = offset;
memset(&maps[i].def, 0, sizeof(struct bpf_load_map_def));
memcpy(&maps[i].def, def, map_sz_copy);
/* Verify no newer features were requested */
if (validate_zero) {
addr = (unsigned char *) def + map_sz_copy;
end = (unsigned char *) def + map_sz_elf;
for (; addr < end; addr++) {
if (*addr != 0) {
free(sym);
return -EFBIG;
}
}
}
}
free(sym);
return nr_maps;
}
static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
{
int fd, i, ret, maps_shndx = -1, strtabidx = -1;
Elf *elf;
GElf_Ehdr ehdr;
GElf_Shdr shdr, shdr_prog;
Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL;
char *shname, *shname_prog;
int nr_maps = 0;
/* reset global variables */
kern_version = 0;
memset(license, 0, sizeof(license));
memset(processed_sec, 0, sizeof(processed_sec));
if (elf_version(EV_CURRENT) == EV_NONE)
return 1;
fd = open(path, O_RDONLY, 0);
if (fd < 0)
return 1;
elf = elf_begin(fd, ELF_C_READ, NULL);
if (!elf)
return 1;
if (gelf_getehdr(elf, &ehdr) != &ehdr)
return 1;
/* clear all kprobes */
i = write_kprobe_events("");
/* scan over all elf sections to get license and map info */
for (i = 1; i < ehdr.e_shnum; i++) {
if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
continue;
if (0) /* helpful for llvm debugging */
printf("section %d:%s data %p size %zd link %d flags %d\n",
i, shname, data->d_buf, data->d_size,
shdr.sh_link, (int) shdr.sh_flags);
if (strcmp(shname, "license") == 0) {
processed_sec[i] = true;
memcpy(license, data->d_buf, data->d_size);
} else if (strcmp(shname, "version") == 0) {
processed_sec[i] = true;
if (data->d_size != sizeof(int)) {
printf("invalid size of version section %zd\n",
data->d_size);
return 1;
}
memcpy(&kern_version, data->d_buf, sizeof(int));
} else if (strcmp(shname, "maps") == 0) {
int j;
maps_shndx = i;
data_maps = data;
for (j = 0; j < MAX_MAPS; j++)
map_data[j].fd = -1;
} else if (shdr.sh_type == SHT_SYMTAB) {
strtabidx = shdr.sh_link;
symbols = data;
}
}
ret = 1;
if (!symbols) {
printf("missing SHT_SYMTAB section\n");
goto done;
}
if (data_maps) {
nr_maps = load_elf_maps_section(map_data, maps_shndx,
elf, symbols, strtabidx);
if (nr_maps < 0) {
printf("Error: Failed loading ELF maps (errno:%d):%s\n",
nr_maps, strerror(-nr_maps));
goto done;
}
if (load_maps(map_data, nr_maps, fixup_map))
goto done;
map_data_count = nr_maps;
processed_sec[maps_shndx] = true;
}
/* process all relo sections, and rewrite bpf insns for maps */
for (i = 1; i < ehdr.e_shnum; i++) {
if (processed_sec[i])
continue;
if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
continue;
if (shdr.sh_type == SHT_REL) {
struct bpf_insn *insns;
/* locate prog sec that need map fixup (relocations) */
if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog,
&shdr_prog, &data_prog))
continue;
if (shdr_prog.sh_type != SHT_PROGBITS ||
!(shdr_prog.sh_flags & SHF_EXECINSTR))
continue;
insns = (struct bpf_insn *) data_prog->d_buf;
processed_sec[i] = true; /* relo section */
if (parse_relo_and_apply(data, symbols, &shdr, insns,
map_data, nr_maps))
continue;
}
}
/* load programs */
for (i = 1; i < ehdr.e_shnum; i++) {
if (processed_sec[i])
continue;
if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
continue;
if (memcmp(shname, "kprobe/", 7) == 0 ||
memcmp(shname, "kretprobe/", 10) == 0 ||
memcmp(shname, "tracepoint/", 11) == 0 ||
memcmp(shname, "raw_tracepoint/", 15) == 0 ||
memcmp(shname, "xdp", 3) == 0 ||
memcmp(shname, "perf_event", 10) == 0 ||
memcmp(shname, "socket", 6) == 0 ||
memcmp(shname, "cgroup/", 7) == 0 ||
memcmp(shname, "sockops", 7) == 0 ||
memcmp(shname, "sk_skb", 6) == 0 ||
memcmp(shname, "sk_msg", 6) == 0) {
ret = load_and_attach(shname, data->d_buf,
data->d_size);
if (ret != 0)
goto done;
}
}
done:
close(fd);
return ret;
}
int load_bpf_file(char *path)
{
return do_load_bpf_file(path, NULL);
}
int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map)
{
return do_load_bpf_file(path, fixup_map);
}
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __BPF_LOAD_H
#define __BPF_LOAD_H
#include <bpf/bpf.h>
#define MAX_MAPS 32
#define MAX_PROGS 32
struct bpf_load_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
unsigned int map_flags;
unsigned int inner_map_idx;
unsigned int numa_node;
};
struct bpf_map_data {
int fd;
char *name;
size_t elf_offset;
struct bpf_load_map_def def;
};
typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx);
extern int prog_fd[MAX_PROGS];
extern int event_fd[MAX_PROGS];
extern char bpf_log_buf[BPF_LOG_BUF_SIZE];
extern int prog_cnt;
/* There is a one-to-one mapping between map_fd[] and map_data[].
* The map_data[] just contains more rich info on the given map.
*/
extern int map_fd[MAX_MAPS];
extern struct bpf_map_data map_data[MAX_MAPS];
extern int map_data_count;
/* parses elf file compiled by llvm .c->.o
* . parses 'maps' section and creates maps via BPF syscall
* . parses 'license' section and passes it to syscall
* . parses elf relocations for BPF maps and adjusts BPF_LD_IMM64 insns by
* storing map_fd into insn->imm and marking such insns as BPF_PSEUDO_MAP_FD
* . loads eBPF programs via BPF syscall
*
* One ELF file can contain multiple BPF programs which will be loaded
* and their FDs stored stored in prog_fd array
*
* returns zero on success
*/
int load_bpf_file(char *path);
int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map);
int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
#endif
...@@ -91,6 +91,16 @@ qdisc="" ...@@ -91,6 +91,16 @@ qdisc=""
flags="" flags=""
do_stats=0 do_stats=0
BPFFS=/sys/fs/bpf
function config_bpffs () {
if mount | grep $BPFFS > /dev/null; then
echo "bpffs already mounted"
else
echo "bpffs not mounted. Mounting..."
mount -t bpf none $BPFFS
fi
}
function start_hbm () { function start_hbm () {
rm -f hbm.out rm -f hbm.out
echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
...@@ -192,6 +202,7 @@ processArgs () { ...@@ -192,6 +202,7 @@ processArgs () {
} }
processArgs processArgs
config_bpffs
if [ $debug_flag -eq 1 ] ; then if [ $debug_flag -eq 1 ] ; then
rm -f hbm_out.log rm -f hbm_out.log
...@@ -201,7 +212,7 @@ hbm_pid=$(start_hbm) ...@@ -201,7 +212,7 @@ hbm_pid=$(start_hbm)
usleep 100000 usleep 100000
host=`hostname` host=`hostname`
cg_base_dir=/sys/fs/cgroup cg_base_dir=/sys/fs/cgroup/unified
cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id" cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
echo $$ >> $cg_dir/cgroup.procs echo $$ >> $cg_dir/cgroup.procs
...@@ -411,23 +422,8 @@ fi ...@@ -411,23 +422,8 @@ fi
sleep 1 sleep 1
# Detach any BPF programs that may have lingered # Detach any pinned BPF programs that may have lingered
ttx=`bpftool cgroup tree | grep hbm` rm -rf $BPFFS/hbm*
v=2
for x in $ttx ; do
if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then
cg=$x ; v=0
else
if [ $v -eq 0 ] ; then
id=$x ; v=1
else
if [ $v -eq 1 ] ; then
type=$x ; bpftool cgroup detach $cg $type id $id
v=0
fi
fi
fi
done
if [ $use_netperf -ne 0 ] ; then if [ $use_netperf -ne 0 ] ; then
if [ "$server" == "" ] ; then if [ "$server" == "" ] ; then
......
...@@ -46,7 +46,6 @@ ...@@ -46,7 +46,6 @@
#include <bpf/bpf.h> #include <bpf/bpf.h>
#include <getopt.h> #include <getopt.h>
#include "bpf_load.h"
#include "bpf_rlimit.h" #include "bpf_rlimit.h"
#include "cgroup_helpers.h" #include "cgroup_helpers.h"
#include "hbm.h" #include "hbm.h"
...@@ -70,9 +69,9 @@ static void do_error(char *msg, bool errno_flag); ...@@ -70,9 +69,9 @@ static void do_error(char *msg, bool errno_flag);
#define DEBUGFS "/sys/kernel/debug/tracing/" #define DEBUGFS "/sys/kernel/debug/tracing/"
struct bpf_object *obj; static struct bpf_program *bpf_prog;
int bpfprog_fd; static struct bpf_object *obj;
int cgroup_storage_fd; static int queue_stats_fd;
static void read_trace_pipe2(void) static void read_trace_pipe2(void)
{ {
...@@ -121,56 +120,50 @@ static void do_error(char *msg, bool errno_flag) ...@@ -121,56 +120,50 @@ static void do_error(char *msg, bool errno_flag)
static int prog_load(char *prog) static int prog_load(char *prog)
{ {
struct bpf_prog_load_attr prog_load_attr = { obj = bpf_object__open_file(prog, NULL);
.prog_type = BPF_PROG_TYPE_CGROUP_SKB, if (libbpf_get_error(obj)) {
.file = prog, printf("ERROR: opening BPF object file failed\n");
.expected_attach_type = BPF_CGROUP_INET_EGRESS,
};
int map_fd;
struct bpf_map *map;
int ret = 0;
if (access(prog, O_RDONLY) < 0) {
printf("Error accessing file %s: %s\n", prog, strerror(errno));
return 1; return 1;
} }
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &bpfprog_fd))
ret = 1; /* load BPF program */
if (!ret) { if (bpf_object__load(obj)) {
map = bpf_object__find_map_by_name(obj, "queue_stats"); printf("ERROR: loading BPF object file failed\n");
map_fd = bpf_map__fd(map); goto err;
if (map_fd < 0) {
printf("Map not found: %s\n", strerror(map_fd));
ret = 1;
}
} }
if (ret) { bpf_prog = bpf_object__find_program_by_title(obj, "cgroup_skb/egress");
printf("ERROR: bpf_prog_load_xattr failed for: %s\n", prog); if (!bpf_prog) {
printf(" Output from verifier:\n%s\n------\n", bpf_log_buf); printf("ERROR: finding a prog in obj file failed\n");
ret = -1; goto err;
} else { }
ret = map_fd;
queue_stats_fd = bpf_object__find_map_fd_by_name(obj, "queue_stats");
if (queue_stats_fd < 0) {
printf("ERROR: finding a map in obj file failed\n");
goto err;
} }
return ret; return 0;
err:
bpf_object__close(obj);
return 1;
} }
static int run_bpf_prog(char *prog, int cg_id) static int run_bpf_prog(char *prog, int cg_id)
{ {
int map_fd; struct hbm_queue_stats qstats = {0};
int rc = 0; char cg_dir[100], cg_pin_path[100];
struct bpf_link *link = NULL;
int key = 0; int key = 0;
int cg1 = 0; int cg1 = 0;
int type = BPF_CGROUP_INET_EGRESS; int rc = 0;
char cg_dir[100];
struct hbm_queue_stats qstats = {0};
sprintf(cg_dir, "/hbm%d", cg_id); sprintf(cg_dir, "/hbm%d", cg_id);
map_fd = prog_load(prog); rc = prog_load(prog);
if (map_fd == -1) if (rc != 0)
return 1; return rc;
if (setup_cgroup_environment()) { if (setup_cgroup_environment()) {
printf("ERROR: setting cgroup environment\n"); printf("ERROR: setting cgroup environment\n");
...@@ -190,16 +183,24 @@ static int run_bpf_prog(char *prog, int cg_id) ...@@ -190,16 +183,24 @@ static int run_bpf_prog(char *prog, int cg_id)
qstats.stats = stats_flag ? 1 : 0; qstats.stats = stats_flag ? 1 : 0;
qstats.loopback = loopback_flag ? 1 : 0; qstats.loopback = loopback_flag ? 1 : 0;
qstats.no_cn = no_cn_flag ? 1 : 0; qstats.no_cn = no_cn_flag ? 1 : 0;
if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY)) { if (bpf_map_update_elem(queue_stats_fd, &key, &qstats, BPF_ANY)) {
printf("ERROR: Could not update map element\n"); printf("ERROR: Could not update map element\n");
goto err; goto err;
} }
if (!outFlag) if (!outFlag)
type = BPF_CGROUP_INET_INGRESS; bpf_program__set_expected_attach_type(bpf_prog, BPF_CGROUP_INET_INGRESS);
if (bpf_prog_attach(bpfprog_fd, cg1, type, 0)) {
printf("ERROR: bpf_prog_attach fails!\n"); link = bpf_program__attach_cgroup(bpf_prog, cg1);
log_err("Attaching prog"); if (libbpf_get_error(link)) {
fprintf(stderr, "ERROR: bpf_program__attach_cgroup failed\n");
goto err;
}
sprintf(cg_pin_path, "/sys/fs/bpf/hbm%d", cg_id);
rc = bpf_link__pin(link, cg_pin_path);
if (rc < 0) {
printf("ERROR: bpf_link__pin failed: %d\n", rc);
goto err; goto err;
} }
...@@ -213,7 +214,7 @@ static int run_bpf_prog(char *prog, int cg_id) ...@@ -213,7 +214,7 @@ static int run_bpf_prog(char *prog, int cg_id)
#define DELTA_RATE_CHECK 10000 /* in us */ #define DELTA_RATE_CHECK 10000 /* in us */
#define RATE_THRESHOLD 9500000000 /* 9.5 Gbps */ #define RATE_THRESHOLD 9500000000 /* 9.5 Gbps */
bpf_map_lookup_elem(map_fd, &key, &qstats); bpf_map_lookup_elem(queue_stats_fd, &key, &qstats);
if (gettimeofday(&t0, NULL) < 0) if (gettimeofday(&t0, NULL) < 0)
do_error("gettimeofday failed", true); do_error("gettimeofday failed", true);
t_last = t0; t_last = t0;
...@@ -242,7 +243,7 @@ static int run_bpf_prog(char *prog, int cg_id) ...@@ -242,7 +243,7 @@ static int run_bpf_prog(char *prog, int cg_id)
fclose(fin); fclose(fin);
printf(" new_eth_tx_bytes:%llu\n", printf(" new_eth_tx_bytes:%llu\n",
new_eth_tx_bytes); new_eth_tx_bytes);
bpf_map_lookup_elem(map_fd, &key, &qstats); bpf_map_lookup_elem(queue_stats_fd, &key, &qstats);
new_cg_tx_bytes = qstats.bytes_total; new_cg_tx_bytes = qstats.bytes_total;
delta_bytes = new_eth_tx_bytes - last_eth_tx_bytes; delta_bytes = new_eth_tx_bytes - last_eth_tx_bytes;
last_eth_tx_bytes = new_eth_tx_bytes; last_eth_tx_bytes = new_eth_tx_bytes;
...@@ -289,14 +290,14 @@ static int run_bpf_prog(char *prog, int cg_id) ...@@ -289,14 +290,14 @@ static int run_bpf_prog(char *prog, int cg_id)
rate = minRate; rate = minRate;
qstats.rate = rate; qstats.rate = rate;
} }
if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY)) if (bpf_map_update_elem(queue_stats_fd, &key, &qstats, BPF_ANY))
do_error("update map element fails", false); do_error("update map element fails", false);
} }
} else { } else {
sleep(dur); sleep(dur);
} }
// Get stats! // Get stats!
if (stats_flag && bpf_map_lookup_elem(map_fd, &key, &qstats)) { if (stats_flag && bpf_map_lookup_elem(queue_stats_fd, &key, &qstats)) {
char fname[100]; char fname[100];
FILE *fout; FILE *fout;
...@@ -394,14 +395,20 @@ static int run_bpf_prog(char *prog, int cg_id) ...@@ -394,14 +395,20 @@ static int run_bpf_prog(char *prog, int cg_id)
if (debugFlag) if (debugFlag)
read_trace_pipe2(); read_trace_pipe2();
return rc; goto cleanup;
err: err:
rc = 1; rc = 1;
if (cg1) cleanup:
bpf_link__destroy(link);
bpf_object__close(obj);
if (cg1 != -1)
close(cg1); close(cg1);
cleanup_cgroup_environment();
if (rc != 0)
cleanup_cgroup_environment();
return rc; return rc;
} }
......
...@@ -69,7 +69,7 @@ struct { ...@@ -69,7 +69,7 @@ struct {
__uint(type, BPF_MAP_TYPE_ARRAY); __uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1); __uint(max_entries, 1);
__type(key, u32); __type(key, u32);
__type(value, struct hvm_queue_stats); __type(value, struct hbm_queue_stats);
} queue_stats SEC(".maps"); } queue_stats SEC(".maps");
struct hbm_pkt_info { struct hbm_pkt_info {
......
...@@ -16,19 +16,19 @@ ...@@ -16,19 +16,19 @@
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
struct bpf_map_def SEC("maps") read_count = { struct {
.type = BPF_MAP_TYPE_ARRAY, __uint(type, BPF_MAP_TYPE_ARRAY);
.key_size = sizeof(u32), /* class; u32 required */ __type(key, u32); /* class; u32 required */
.value_size = sizeof(u64), /* count of mads read */ __type(value, u64); /* count of mads read */
.max_entries = 256, /* Room for all Classes */ __uint(max_entries, 256); /* Room for all Classes */
}; } read_count SEC(".maps");
struct bpf_map_def SEC("maps") write_count = { struct {
.type = BPF_MAP_TYPE_ARRAY, __uint(type, BPF_MAP_TYPE_ARRAY);
.key_size = sizeof(u32), /* class; u32 required */ __type(key, u32); /* class; u32 required */
.value_size = sizeof(u64), /* count of mads written */ __type(value, u64); /* count of mads written */
.max_entries = 256, /* Room for all Classes */ __uint(max_entries, 256); /* Room for all Classes */
}; } write_count SEC(".maps");
#undef DEBUG #undef DEBUG
#ifndef DEBUG #ifndef DEBUG
......
...@@ -23,10 +23,15 @@ ...@@ -23,10 +23,15 @@
#include <getopt.h> #include <getopt.h>
#include <net/if.h> #include <net/if.h>
#include "bpf_load.h" #include <bpf/bpf.h>
#include "bpf_util.h" #include "bpf_util.h"
#include <bpf/libbpf.h> #include <bpf/libbpf.h>
static struct bpf_link *tp_links[3];
static struct bpf_object *obj;
static int map_fd[2];
static int tp_cnt;
static void dump_counts(int fd) static void dump_counts(int fd)
{ {
__u32 key; __u32 key;
...@@ -53,6 +58,11 @@ static void dump_all_counts(void) ...@@ -53,6 +58,11 @@ static void dump_all_counts(void)
static void dump_exit(int sig) static void dump_exit(int sig)
{ {
dump_all_counts(); dump_all_counts();
/* Detach tracepoints */
while (tp_cnt)
bpf_link__destroy(tp_links[--tp_cnt]);
bpf_object__close(obj);
exit(0); exit(0);
} }
...@@ -73,19 +83,11 @@ static void usage(char *cmd) ...@@ -73,19 +83,11 @@ static void usage(char *cmd)
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
struct bpf_program *prog;
unsigned long delay = 5; unsigned long delay = 5;
char filename[256];
int longindex = 0; int longindex = 0;
int opt; int opt, err = -1;
char bpf_file[256];
/* Create the eBPF kernel code path name.
* This follows the pattern of all of the other bpf samples
*/
snprintf(bpf_file, sizeof(bpf_file), "%s_kern.o", argv[0]);
/* Do one final dump when exiting */
signal(SIGINT, dump_exit);
signal(SIGTERM, dump_exit);
while ((opt = getopt_long(argc, argv, "hd:rSw", while ((opt = getopt_long(argc, argv, "hd:rSw",
long_options, &longindex)) != -1) { long_options, &longindex)) != -1) {
...@@ -107,16 +109,51 @@ int main(int argc, char **argv) ...@@ -107,16 +109,51 @@ int main(int argc, char **argv)
} }
} }
if (load_bpf_file(bpf_file)) { /* Do one final dump when exiting */
fprintf(stderr, "ERROR: failed to load eBPF from file : %s\n", signal(SIGINT, dump_exit);
bpf_file); signal(SIGTERM, dump_exit);
return 1;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
fprintf(stderr, "ERROR: opening BPF object file failed\n");
return err;
}
/* load BPF program */
if (bpf_object__load(obj)) {
fprintf(stderr, "ERROR: loading BPF object file failed\n");
goto cleanup;
}
map_fd[0] = bpf_object__find_map_fd_by_name(obj, "read_count");
map_fd[1] = bpf_object__find_map_fd_by_name(obj, "write_count");
if (map_fd[0] < 0 || map_fd[1] < 0) {
fprintf(stderr, "ERROR: finding a map in obj file failed\n");
goto cleanup;
}
bpf_object__for_each_program(prog, obj) {
tp_links[tp_cnt] = bpf_program__attach(prog);
if (libbpf_get_error(tp_links[tp_cnt])) {
fprintf(stderr, "ERROR: bpf_program__attach failed\n");
tp_links[tp_cnt] = NULL;
goto cleanup;
}
tp_cnt++;
} }
while (1) { while (1) {
sleep(delay); sleep(delay);
dump_all_counts(); dump_all_counts();
} }
err = 0;
cleanup:
/* Detach tracepoints */
while (tp_cnt)
bpf_link__destroy(tp_links[--tp_cnt]);
return 0; bpf_object__close(obj);
return err;
} }
...@@ -8,6 +8,8 @@ VETH1=tst_lwt1b ...@@ -8,6 +8,8 @@ VETH1=tst_lwt1b
TRACE_ROOT=/sys/kernel/debug/tracing TRACE_ROOT=/sys/kernel/debug/tracing
function cleanup { function cleanup {
# To reset saved histogram, remove pinned map
rm /sys/fs/bpf/tc/globals/lwt_len_hist_map
ip route del 192.168.253.2/32 dev $VETH0 2> /dev/null ip route del 192.168.253.2/32 dev $VETH0 2> /dev/null
ip link del $VETH0 2> /dev/null ip link del $VETH0 2> /dev/null
ip link del $VETH1 2> /dev/null ip link del $VETH1 2> /dev/null
......
...@@ -15,12 +15,15 @@ ...@@ -15,12 +15,15 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <linux/perf_event.h> #include <linux/perf_event.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h> #include <bpf/libbpf.h>
#include "bpf_load.h"
#include "bpf_util.h" #include "bpf_util.h"
#include "perf-sys.h" #include "perf-sys.h"
#include "trace_helpers.h" #include "trace_helpers.h"
static struct bpf_program *progs[2];
static struct bpf_link *links[2];
#define CHECK_PERROR_RET(condition) ({ \ #define CHECK_PERROR_RET(condition) ({ \
int __ret = !!(condition); \ int __ret = !!(condition); \
if (__ret) { \ if (__ret) { \
...@@ -86,21 +89,22 @@ static int bpf_get_retprobe_bit(const char *event_type) ...@@ -86,21 +89,22 @@ static int bpf_get_retprobe_bit(const char *event_type)
return ret; return ret;
} }
static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name, static int test_debug_fs_kprobe(int link_idx, const char *fn_name,
__u32 expected_fd_type) __u32 expected_fd_type)
{ {
__u64 probe_offset, probe_addr; __u64 probe_offset, probe_addr;
__u32 len, prog_id, fd_type; __u32 len, prog_id, fd_type;
int err, event_fd;
char buf[256]; char buf[256];
int err;
len = sizeof(buf); len = sizeof(buf);
err = bpf_task_fd_query(getpid(), event_fd[prog_fd_idx], 0, buf, &len, event_fd = bpf_link__fd(links[link_idx]);
err = bpf_task_fd_query(getpid(), event_fd, 0, buf, &len,
&prog_id, &fd_type, &probe_offset, &prog_id, &fd_type, &probe_offset,
&probe_addr); &probe_addr);
if (err < 0) { if (err < 0) {
printf("FAIL: %s, for event_fd idx %d, fn_name %s\n", printf("FAIL: %s, for event_fd idx %d, fn_name %s\n",
__func__, prog_fd_idx, fn_name); __func__, link_idx, fn_name);
perror(" :"); perror(" :");
return -1; return -1;
} }
...@@ -108,7 +112,7 @@ static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name, ...@@ -108,7 +112,7 @@ static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name,
fd_type != expected_fd_type || fd_type != expected_fd_type ||
probe_offset != 0x0 || probe_addr != 0x0) { probe_offset != 0x0 || probe_addr != 0x0) {
printf("FAIL: bpf_trace_event_query(event_fd[%d]):\n", printf("FAIL: bpf_trace_event_query(event_fd[%d]):\n",
prog_fd_idx); link_idx);
printf("buf: %s, fd_type: %u, probe_offset: 0x%llx," printf("buf: %s, fd_type: %u, probe_offset: 0x%llx,"
" probe_addr: 0x%llx\n", " probe_addr: 0x%llx\n",
buf, fd_type, probe_offset, probe_addr); buf, fd_type, probe_offset, probe_addr);
...@@ -125,12 +129,13 @@ static int test_nondebug_fs_kuprobe_common(const char *event_type, ...@@ -125,12 +129,13 @@ static int test_nondebug_fs_kuprobe_common(const char *event_type,
int is_return_bit = bpf_get_retprobe_bit(event_type); int is_return_bit = bpf_get_retprobe_bit(event_type);
int type = bpf_find_probe_type(event_type); int type = bpf_find_probe_type(event_type);
struct perf_event_attr attr = {}; struct perf_event_attr attr = {};
int fd; struct bpf_link *link;
int fd, err = -1;
if (type < 0 || is_return_bit < 0) { if (type < 0 || is_return_bit < 0) {
printf("FAIL: %s incorrect type (%d) or is_return_bit (%d)\n", printf("FAIL: %s incorrect type (%d) or is_return_bit (%d)\n",
__func__, type, is_return_bit); __func__, type, is_return_bit);
return -1; return err;
} }
attr.sample_period = 1; attr.sample_period = 1;
...@@ -149,14 +154,21 @@ static int test_nondebug_fs_kuprobe_common(const char *event_type, ...@@ -149,14 +154,21 @@ static int test_nondebug_fs_kuprobe_common(const char *event_type,
attr.type = type; attr.type = type;
fd = sys_perf_event_open(&attr, -1, 0, -1, 0); fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
CHECK_PERROR_RET(fd < 0); link = bpf_program__attach_perf_event(progs[0], fd);
if (libbpf_get_error(link)) {
printf("ERROR: bpf_program__attach_perf_event failed\n");
link = NULL;
close(fd);
goto cleanup;
}
CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0);
CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
CHECK_PERROR_RET(bpf_task_fd_query(getpid(), fd, 0, buf, buf_len, CHECK_PERROR_RET(bpf_task_fd_query(getpid(), fd, 0, buf, buf_len,
prog_id, fd_type, probe_offset, probe_addr) < 0); prog_id, fd_type, probe_offset, probe_addr) < 0);
err = 0;
return 0; cleanup:
bpf_link__destroy(link);
return err;
} }
static int test_nondebug_fs_probe(const char *event_type, const char *name, static int test_nondebug_fs_probe(const char *event_type, const char *name,
...@@ -215,17 +227,18 @@ static int test_nondebug_fs_probe(const char *event_type, const char *name, ...@@ -215,17 +227,18 @@ static int test_nondebug_fs_probe(const char *event_type, const char *name,
static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
{ {
char buf[256], event_alias[sizeof("test_1234567890")];
const char *event_type = "uprobe"; const char *event_type = "uprobe";
struct perf_event_attr attr = {}; struct perf_event_attr attr = {};
char buf[256], event_alias[sizeof("test_1234567890")];
__u64 probe_offset, probe_addr; __u64 probe_offset, probe_addr;
__u32 len, prog_id, fd_type; __u32 len, prog_id, fd_type;
int err, res, kfd, efd; int err = -1, res, kfd, efd;
struct bpf_link *link;
ssize_t bytes; ssize_t bytes;
snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events",
event_type); event_type);
kfd = open(buf, O_WRONLY | O_APPEND, 0); kfd = open(buf, O_WRONLY | O_TRUNC, 0);
CHECK_PERROR_RET(kfd < 0); CHECK_PERROR_RET(kfd < 0);
res = snprintf(event_alias, sizeof(event_alias), "test_%d", getpid()); res = snprintf(event_alias, sizeof(event_alias), "test_%d", getpid());
...@@ -254,10 +267,15 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) ...@@ -254,10 +267,15 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
attr.type = PERF_TYPE_TRACEPOINT; attr.type = PERF_TYPE_TRACEPOINT;
attr.sample_period = 1; attr.sample_period = 1;
attr.wakeup_events = 1; attr.wakeup_events = 1;
kfd = sys_perf_event_open(&attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); kfd = sys_perf_event_open(&attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
CHECK_PERROR_RET(kfd < 0); link = bpf_program__attach_perf_event(progs[0], kfd);
CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0); if (libbpf_get_error(link)) {
CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_ENABLE, 0) < 0); printf("ERROR: bpf_program__attach_perf_event failed\n");
link = NULL;
close(kfd);
goto cleanup;
}
len = sizeof(buf); len = sizeof(buf);
err = bpf_task_fd_query(getpid(), kfd, 0, buf, &len, err = bpf_task_fd_query(getpid(), kfd, 0, buf, &len,
...@@ -283,9 +301,11 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) ...@@ -283,9 +301,11 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
probe_offset); probe_offset);
return -1; return -1;
} }
err = 0;
close(kfd); cleanup:
return 0; bpf_link__destroy(link);
return err;
} }
int main(int argc, char **argv) int main(int argc, char **argv)
...@@ -294,21 +314,42 @@ int main(int argc, char **argv) ...@@ -294,21 +314,42 @@ int main(int argc, char **argv)
extern char __executable_start; extern char __executable_start;
char filename[256], buf[256]; char filename[256], buf[256];
__u64 uprobe_file_offset; __u64 uprobe_file_offset;
struct bpf_program *prog;
struct bpf_object *obj;
int i = 0, err = -1;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (setrlimit(RLIMIT_MEMLOCK, &r)) { if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK)"); perror("setrlimit(RLIMIT_MEMLOCK)");
return 1; return err;
} }
if (load_kallsyms()) { if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n"); printf("failed to process /proc/kallsyms\n");
return 1; return err;
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
fprintf(stderr, "ERROR: opening BPF object file failed\n");
return err;
} }
if (load_bpf_file(filename)) { /* load BPF program */
printf("%s", bpf_log_buf); if (bpf_object__load(obj)) {
return 1; fprintf(stderr, "ERROR: loading BPF object file failed\n");
goto cleanup;
}
bpf_object__for_each_program(prog, obj) {
progs[i] = prog;
links[i] = bpf_program__attach(progs[i]);
if (libbpf_get_error(links[i])) {
fprintf(stderr, "ERROR: bpf_program__attach failed\n");
links[i] = NULL;
goto cleanup;
}
i++;
} }
/* test two functions in the corresponding *_kern.c file */ /* test two functions in the corresponding *_kern.c file */
...@@ -378,6 +419,12 @@ int main(int argc, char **argv) ...@@ -378,6 +419,12 @@ int main(int argc, char **argv)
false)); false));
CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset, CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
true)); true));
err = 0;
return 0; cleanup:
for (i--; i >= 0; i--)
bpf_link__destroy(links[i]);
bpf_object__close(obj);
return err;
} }
...@@ -20,9 +20,9 @@ ...@@ -20,9 +20,9 @@
#include <net/if.h> #include <net/if.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <bpf/bpf.h> #include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "bpf_insn.h" #include "bpf_insn.h"
#include "bpf_load.h"
static int usage(const char *argv0) static int usage(const char *argv0)
{ {
...@@ -32,37 +32,64 @@ static int usage(const char *argv0) ...@@ -32,37 +32,64 @@ static int usage(const char *argv0)
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
int cg_fd, ret, filter_id = 0; int cg_fd, err, ret = EXIT_FAILURE, filter_id = 0, prog_cnt = 0;
const char *link_pin_path = "/sys/fs/bpf/test_cgrp2_sock2";
struct bpf_link *link = NULL;
struct bpf_program *progs[2];
struct bpf_program *prog;
struct bpf_object *obj;
if (argc < 3) if (argc < 3)
return usage(argv[0]); return usage(argv[0]);
if (argc > 3)
filter_id = atoi(argv[3]);
cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY); cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
if (cg_fd < 0) { if (cg_fd < 0) {
printf("Failed to open cgroup path: '%s'\n", strerror(errno)); printf("Failed to open cgroup path: '%s'\n", strerror(errno));
return EXIT_FAILURE; return ret;
} }
if (load_bpf_file(argv[2])) obj = bpf_object__open_file(argv[2], NULL);
return EXIT_FAILURE; if (libbpf_get_error(obj)) {
printf("ERROR: opening BPF object file failed\n");
printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf); return ret;
}
if (argc > 3) bpf_object__for_each_program(prog, obj) {
filter_id = atoi(argv[3]); progs[prog_cnt] = prog;
prog_cnt++;
}
if (filter_id >= prog_cnt) { if (filter_id >= prog_cnt) {
printf("Invalid program id; program not found in file\n"); printf("Invalid program id; program not found in file\n");
return EXIT_FAILURE; goto cleanup;
}
/* load BPF program */
if (bpf_object__load(obj)) {
printf("ERROR: loading BPF object file failed\n");
goto cleanup;
} }
ret = bpf_prog_attach(prog_fd[filter_id], cg_fd, link = bpf_program__attach_cgroup(progs[filter_id], cg_fd);
BPF_CGROUP_INET_SOCK_CREATE, 0); if (libbpf_get_error(link)) {
if (ret < 0) { printf("ERROR: bpf_program__attach failed\n");
printf("Failed to attach prog to cgroup: '%s'\n", link = NULL;
strerror(errno)); goto cleanup;
return EXIT_FAILURE;
} }
return EXIT_SUCCESS; err = bpf_link__pin(link, link_pin_path);
if (err < 0) {
printf("ERROR: bpf_link__pin failed: %d\n", err);
goto cleanup;
}
ret = EXIT_SUCCESS;
cleanup:
bpf_link__destroy(link);
bpf_object__close(obj);
return ret;
} }
#!/bin/bash #!/bin/bash
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
BPFFS=/sys/fs/bpf
LINK_PIN=$BPFFS/test_cgrp2_sock2
function config_device { function config_device {
ip netns add at_ns0 ip netns add at_ns0
ip link add veth0 type veth peer name veth0b ip link add veth0 type veth peer name veth0b
...@@ -21,16 +24,22 @@ function config_cgroup { ...@@ -21,16 +24,22 @@ function config_cgroup {
echo $$ >> /tmp/cgroupv2/foo/cgroup.procs echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
} }
function config_bpffs {
if mount | grep $BPFFS > /dev/null; then
echo "bpffs already mounted"
else
echo "bpffs not mounted. Mounting..."
mount -t bpf none $BPFFS
fi
}
function attach_bpf { function attach_bpf {
test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1 ./test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1
[ $? -ne 0 ] && exit 1 [ $? -ne 0 ] && exit 1
} }
function cleanup { function cleanup {
if [ -d /tmp/cgroupv2/foo ]; then rm -rf $LINK_PIN
test_cgrp2_sock -d /tmp/cgroupv2/foo
fi
ip link del veth0b ip link del veth0b
ip netns delete at_ns0 ip netns delete at_ns0
umount /tmp/cgroupv2 umount /tmp/cgroupv2
...@@ -42,6 +51,7 @@ cleanup 2>/dev/null ...@@ -42,6 +51,7 @@ cleanup 2>/dev/null
set -e set -e
config_device config_device
config_cgroup config_cgroup
config_bpffs
set +e set +e
# #
...@@ -62,6 +72,9 @@ if [ $? -eq 0 ]; then ...@@ -62,6 +72,9 @@ if [ $? -eq 0 ]; then
exit 1 exit 1
fi fi
rm -rf $LINK_PIN
sleep 1 # Wait for link detach
# #
# Test 2 - fail ping # Test 2 - fail ping
# #
......
File mode changed from 100644 to 100755
...@@ -18,10 +18,14 @@ ...@@ -18,10 +18,14 @@
#include <time.h> #include <time.h>
#include <sys/resource.h> #include <sys/resource.h>
#include <bpf/bpf.h> #include <bpf/bpf.h>
#include "bpf_load.h" #include <bpf/libbpf.h>
#define MAX_CNT 1000000 #define MAX_CNT 1000000
static struct bpf_link *links[2];
static struct bpf_object *obj;
static int cnt;
static __u64 time_get_ns(void) static __u64 time_get_ns(void)
{ {
struct timespec ts; struct timespec ts;
...@@ -115,20 +119,54 @@ static void run_perf_test(int tasks, int flags) ...@@ -115,20 +119,54 @@ static void run_perf_test(int tasks, int flags)
} }
} }
static int load_progs(char *filename)
{
struct bpf_program *prog;
int err = 0;
obj = bpf_object__open_file(filename, NULL);
err = libbpf_get_error(obj);
if (err < 0) {
fprintf(stderr, "ERROR: opening BPF object file failed\n");
return err;
}
/* load BPF program */
err = bpf_object__load(obj);
if (err < 0) {
fprintf(stderr, "ERROR: loading BPF object file failed\n");
return err;
}
bpf_object__for_each_program(prog, obj) {
links[cnt] = bpf_program__attach(prog);
err = libbpf_get_error(links[cnt]);
if (err < 0) {
fprintf(stderr, "ERROR: bpf_program__attach failed\n");
links[cnt] = NULL;
return err;
}
cnt++;
}
return err;
}
static void unload_progs(void) static void unload_progs(void)
{ {
close(prog_fd[0]); while (cnt)
close(prog_fd[1]); bpf_link__destroy(links[--cnt]);
close(event_fd[0]);
close(event_fd[1]); bpf_object__close(obj);
} }
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
char filename[256]; int num_cpu = sysconf(_SC_NPROCESSORS_ONLN);
int num_cpu = 8;
int test_flags = ~0; int test_flags = ~0;
char filename[256];
int err = 0;
setrlimit(RLIMIT_MEMLOCK, &r); setrlimit(RLIMIT_MEMLOCK, &r);
...@@ -145,38 +183,36 @@ int main(int argc, char **argv) ...@@ -145,38 +183,36 @@ int main(int argc, char **argv)
if (test_flags & 0xC) { if (test_flags & 0xC) {
snprintf(filename, sizeof(filename), snprintf(filename, sizeof(filename),
"%s_kprobe_kern.o", argv[0]); "%s_kprobe_kern.o", argv[0]);
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
}
printf("w/KPROBE\n"); printf("w/KPROBE\n");
run_perf_test(num_cpu, test_flags >> 2); err = load_progs(filename);
if (!err)
run_perf_test(num_cpu, test_flags >> 2);
unload_progs(); unload_progs();
} }
if (test_flags & 0x30) { if (test_flags & 0x30) {
snprintf(filename, sizeof(filename), snprintf(filename, sizeof(filename),
"%s_tp_kern.o", argv[0]); "%s_tp_kern.o", argv[0]);
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
}
printf("w/TRACEPOINT\n"); printf("w/TRACEPOINT\n");
run_perf_test(num_cpu, test_flags >> 4); err = load_progs(filename);
if (!err)
run_perf_test(num_cpu, test_flags >> 4);
unload_progs(); unload_progs();
} }
if (test_flags & 0xC0) { if (test_flags & 0xC0) {
snprintf(filename, sizeof(filename), snprintf(filename, sizeof(filename),
"%s_raw_tp_kern.o", argv[0]); "%s_raw_tp_kern.o", argv[0]);
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
}
printf("w/RAW_TRACEPOINT\n"); printf("w/RAW_TRACEPOINT\n");
run_perf_test(num_cpu, test_flags >> 6); err = load_progs(filename);
if (!err)
run_perf_test(num_cpu, test_flags >> 6);
unload_progs(); unload_progs();
} }
return 0; return err;
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* This uses the XDP data_meta infrastructure, and is a cooperation * This uses the XDP data_meta infrastructure, and is a cooperation
* between two bpf-programs (1) XDP and (2) clsact at TC-ingress hook. * between two bpf-programs (1) XDP and (2) clsact at TC-ingress hook.
* *
* Notice: This example does not use the BPF C-loader (bpf_load.c), * Notice: This example does not use the BPF C-loader,
* but instead rely on the iproute2 TC tool for loading BPF-objects. * but instead rely on the iproute2 TC tool for loading BPF-objects.
*/ */
#include <uapi/linux/bpf.h> #include <uapi/linux/bpf.h>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment