Commit 73376328 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf-token-support-in-libbpf-s-bpf-object'

Andrii Nakryiko says:

====================
BPF token support in libbpf's BPF object

Add fuller support for BPF token in high-level BPF object APIs. This is the
most frequently used way to work with BPF using libbpf, so supporting BPF
token there is critical.

Patch #1 is improving kernel-side BPF_TOKEN_CREATE behavior by rejecting to
create "empty" BPF token with no delegation. This seems like saner behavior
which also makes libbpf's caching better overall. If we ever want to create
BPF token with no delegate_xxx options set on BPF FS, we can use a new flag to
enable that.

Patches #2-#5 refactor libbpf internals, mostly feature detection code, to
prepare it from BPF token FD.

Patch #6 adds options to pass BPF token into BPF object open options. It also
adds implicit BPF token creation logic to BPF object load step, even without
any explicit involvement of the user. If the environment is setup properly,
BPF token will be created transparently and used implicitly. This allows for
all existing application to gain BPF token support by just linking with
latest version of libbpf library. No source code modifications are required.
All that under assumption that privileged container management agent properly
set up default BPF FS instance at /sys/bpf/fs to allow BPF token creation.

Patches #7-#8 adds more selftests, validating BPF object APIs work as expected
under unprivileged user namespaced conditions in the presence of BPF token.

Patch #9 extends libbpf with LIBBPF_BPF_TOKEN_PATH envvar knowledge, which can
be used to override custom BPF FS location used for implicit BPF token
creation logic without needing to adjust application code. This allows admins
or container managers to mount BPF token-enabled BPF FS at non-standard
location without the need to coordinate with applications.
LIBBPF_BPF_TOKEN_PATH can also be used to disable BPF token implicit creation
by setting it to an empty value. Patch #10 tests this new envvar functionality.

v2->v3:
  - move some stray feature cache refactorings into patch #4 (Alexei);
  - add LIBBPF_BPF_TOKEN_PATH envvar support (Alexei);
v1->v2:
  - remove minor code redundancies (Eduard, John);
  - add acks and rebase.
====================

Link: https://lore.kernel.org/r/20231213190842.3844987-1-andrii@kernel.orgSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents f04f2ce6 322122bf
...@@ -152,6 +152,15 @@ int bpf_token_create(union bpf_attr *attr) ...@@ -152,6 +152,15 @@ int bpf_token_create(union bpf_attr *attr)
goto out_path; goto out_path;
} }
mnt_opts = path.dentry->d_sb->s_fs_info;
if (mnt_opts->delegate_cmds == 0 &&
mnt_opts->delegate_maps == 0 &&
mnt_opts->delegate_progs == 0 &&
mnt_opts->delegate_attachs == 0) {
err = -ENOENT; /* no BPF token delegation is set up */
goto out_path;
}
mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
inode = bpf_get_inode(path.mnt->mnt_sb, NULL, mode); inode = bpf_get_inode(path.mnt->mnt_sb, NULL, mode);
if (IS_ERR(inode)) { if (IS_ERR(inode)) {
...@@ -181,7 +190,6 @@ int bpf_token_create(union bpf_attr *attr) ...@@ -181,7 +190,6 @@ int bpf_token_create(union bpf_attr *attr)
/* remember bpffs owning userns for future ns_capable() checks */ /* remember bpffs owning userns for future ns_capable() checks */
token->userns = get_user_ns(userns); token->userns = get_user_ns(userns);
mnt_opts = path.dentry->d_sb->s_fs_info;
token->allowed_cmds = mnt_opts->delegate_cmds; token->allowed_cmds = mnt_opts->delegate_cmds;
token->allowed_maps = mnt_opts->delegate_maps; token->allowed_maps = mnt_opts->delegate_maps;
token->allowed_progs = mnt_opts->delegate_progs; token->allowed_progs = mnt_opts->delegate_progs;
......
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
usdt.o zip.o elf.o usdt.o zip.o elf.o features.o
...@@ -103,7 +103,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) ...@@ -103,7 +103,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
* [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/
* [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper")
*/ */
int probe_memcg_account(void) int probe_memcg_account(int token_fd)
{ {
const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd);
struct bpf_insn insns[] = { struct bpf_insn insns[] = {
...@@ -120,6 +120,7 @@ int probe_memcg_account(void) ...@@ -120,6 +120,7 @@ int probe_memcg_account(void)
attr.insns = ptr_to_u64(insns); attr.insns = ptr_to_u64(insns);
attr.insn_cnt = insn_cnt; attr.insn_cnt = insn_cnt;
attr.license = ptr_to_u64("GPL"); attr.license = ptr_to_u64("GPL");
attr.prog_token_fd = token_fd;
prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz); prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz);
if (prog_fd >= 0) { if (prog_fd >= 0) {
...@@ -146,7 +147,7 @@ int bump_rlimit_memlock(void) ...@@ -146,7 +147,7 @@ int bump_rlimit_memlock(void)
struct rlimit rlim; struct rlimit rlim;
/* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */
if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT))
return 0; return 0;
memlock_bumped = true; memlock_bumped = true;
...@@ -181,7 +182,7 @@ int bpf_map_create(enum bpf_map_type map_type, ...@@ -181,7 +182,7 @@ int bpf_map_create(enum bpf_map_type map_type,
return libbpf_err(-EINVAL); return libbpf_err(-EINVAL);
attr.map_type = map_type; attr.map_type = map_type;
if (map_name && kernel_supports(NULL, FEAT_PROG_NAME)) if (map_name && feat_supported(NULL, FEAT_PROG_NAME))
libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
attr.key_size = key_size; attr.key_size = key_size;
attr.value_size = value_size; attr.value_size = value_size;
...@@ -265,7 +266,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, ...@@ -265,7 +266,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
attr.kern_version = OPTS_GET(opts, kern_version, 0); attr.kern_version = OPTS_GET(opts, kern_version, 0);
attr.prog_token_fd = OPTS_GET(opts, token_fd, 0); attr.prog_token_fd = OPTS_GET(opts, token_fd, 0);
if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME)) if (prog_name && feat_supported(NULL, FEAT_PROG_NAME))
libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
attr.license = ptr_to_u64(license); attr.license = ptr_to_u64(license);
......
...@@ -1317,7 +1317,9 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf) ...@@ -1317,7 +1317,9 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf)
static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level) int btf_load_into_kernel(struct btf *btf,
char *log_buf, size_t log_sz, __u32 log_level,
int token_fd)
{ {
LIBBPF_OPTS(bpf_btf_load_opts, opts); LIBBPF_OPTS(bpf_btf_load_opts, opts);
__u32 buf_sz = 0, raw_size; __u32 buf_sz = 0, raw_size;
...@@ -1367,6 +1369,7 @@ int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 lo ...@@ -1367,6 +1369,7 @@ int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 lo
opts.log_level = log_level; opts.log_level = log_level;
} }
opts.token_fd = token_fd;
btf->fd = bpf_btf_load(raw_data, raw_size, &opts); btf->fd = bpf_btf_load(raw_data, raw_size, &opts);
if (btf->fd < 0) { if (btf->fd < 0) {
/* time to turn on verbose mode and try again */ /* time to turn on verbose mode and try again */
...@@ -1394,7 +1397,7 @@ int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 lo ...@@ -1394,7 +1397,7 @@ int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 lo
int btf__load_into_kernel(struct btf *btf) int btf__load_into_kernel(struct btf *btf)
{ {
return btf_load_into_kernel(btf, NULL, 0, 0); return btf_load_into_kernel(btf, NULL, 0, 0, 0);
} }
int btf__fd(const struct btf *btf) int btf__fd(const struct btf *btf)
......
...@@ -11,8 +11,6 @@ ...@@ -11,8 +11,6 @@
#include "libbpf_internal.h" #include "libbpf_internal.h"
#include "str_error.h" #include "str_error.h"
#define STRERR_BUFSIZE 128
/* A SHT_GNU_versym section holds 16-bit words. This bit is set if /* A SHT_GNU_versym section holds 16-bit words. This bit is set if
* the symbol is hidden and can only be seen when referenced using an * the symbol is hidden and can only be seen when referenced using an
* explicit version number. This is a GNU extension. * explicit version number. This is a GNU extension.
......
This diff is collapsed.
This diff is collapsed.
...@@ -177,10 +177,45 @@ struct bpf_object_open_opts { ...@@ -177,10 +177,45 @@ struct bpf_object_open_opts {
* logs through its print callback. * logs through its print callback.
*/ */
__u32 kernel_log_level; __u32 kernel_log_level;
/* FD of a BPF token instantiated by user through bpf_token_create()
* API. BPF object will keep dup()'ed FD internally, so passed token
* FD can be closed after BPF object/skeleton open step.
*
* Setting bpf_token_fd to negative value disables libbpf's automatic
* attempt to create BPF token from default BPF FS mount point
* (/sys/fs/bpf), in case this default behavior is undesirable.
*
* If bpf_token_path and bpf_token_fd are not specified, libbpf will
* consult LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will
* be taken as a value of bpf_token_path option and will force libbpf
* to either create BPF token from provided custom BPF FS path, or
* will disable implicit BPF token creation, if envvar value is an
* empty string.
*
* bpf_token_path and bpf_token_fd are mutually exclusive and only one
* of those options should be set. Either of them overrides
* LIBBPF_BPF_TOKEN_PATH envvar.
*/
int bpf_token_fd;
/* Path to BPF FS mount point to derive BPF token from.
*
* Created BPF token will be used for all bpf() syscall operations
* that accept BPF token (e.g., map creation, BTF and program loads,
* etc) automatically within instantiated BPF object.
*
* Setting bpf_token_path option to empty string disables libbpf's
* automatic attempt to create BPF token from default BPF FS mount
* point (/sys/fs/bpf), in case this default behavior is undesirable.
*
* bpf_token_path and bpf_token_fd are mutually exclusive and only one
* of those options should be set. Either of them overrides
* LIBBPF_BPF_TOKEN_PATH envvar.
*/
const char *bpf_token_path;
size_t :0; size_t :0;
}; };
#define bpf_object_open_opts__last_field kernel_log_level #define bpf_object_open_opts__last_field bpf_token_path
/** /**
* @brief **bpf_object__open()** creates a bpf_object by opening * @brief **bpf_object__open()** creates a bpf_object by opening
......
...@@ -360,15 +360,32 @@ enum kern_feature_id { ...@@ -360,15 +360,32 @@ enum kern_feature_id {
__FEAT_CNT, __FEAT_CNT,
}; };
int probe_memcg_account(void); enum kern_feature_result {
FEAT_UNKNOWN = 0,
FEAT_SUPPORTED = 1,
FEAT_MISSING = 2,
};
struct kern_feature_cache {
enum kern_feature_result res[__FEAT_CNT];
int token_fd;
};
bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id);
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
int probe_kern_syscall_wrapper(int token_fd);
int probe_memcg_account(int token_fd);
int bump_rlimit_memlock(void); int bump_rlimit_memlock(void);
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
int libbpf__load_raw_btf(const char *raw_types, size_t types_len, int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
const char *str_sec, size_t str_len); const char *str_sec, size_t str_len,
int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); int token_fd);
int btf_load_into_kernel(struct btf *btf,
char *log_buf, size_t log_sz, __u32 log_level,
int token_fd);
struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
...@@ -532,6 +549,17 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) ...@@ -532,6 +549,17 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn)
return insn->code == (BPF_LD | BPF_IMM | BPF_DW); return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
} }
/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range.
* Original FD is not closed or altered in any other way.
* Preserves original FD value, if it's invalid (negative).
*/
static inline int dup_good_fd(int fd)
{
if (fd < 0)
return fd;
return fcntl(fd, F_DUPFD_CLOEXEC, 3);
}
/* if fd is stdin, stdout, or stderr, dup to a fd greater than 2 /* if fd is stdin, stdout, or stderr, dup to a fd greater than 2
* Takes ownership of the fd passed in, and closes it if calling * Takes ownership of the fd passed in, and closes it if calling
* fcntl(fd, F_DUPFD_CLOEXEC, 3). * fcntl(fd, F_DUPFD_CLOEXEC, 3).
...@@ -543,7 +571,7 @@ static inline int ensure_good_fd(int fd) ...@@ -543,7 +571,7 @@ static inline int ensure_good_fd(int fd)
if (fd < 0) if (fd < 0)
return fd; return fd;
if (fd < 3) { if (fd < 3) {
fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); fd = dup_good_fd(fd);
saved_errno = errno; saved_errno = errno;
close(old_fd); close(old_fd);
errno = saved_errno; errno = saved_errno;
......
...@@ -219,7 +219,8 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) ...@@ -219,7 +219,8 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts)
} }
int libbpf__load_raw_btf(const char *raw_types, size_t types_len, int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
const char *str_sec, size_t str_len) const char *str_sec, size_t str_len,
int token_fd)
{ {
struct btf_header hdr = { struct btf_header hdr = {
.magic = BTF_MAGIC, .magic = BTF_MAGIC,
...@@ -229,6 +230,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, ...@@ -229,6 +230,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
.str_off = types_len, .str_off = types_len,
.str_len = str_len, .str_len = str_len,
}; };
LIBBPF_OPTS(bpf_btf_load_opts, opts, .token_fd = token_fd);
int btf_fd, btf_len; int btf_fd, btf_len;
__u8 *raw_btf; __u8 *raw_btf;
...@@ -241,7 +243,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, ...@@ -241,7 +243,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len);
memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len);
btf_fd = bpf_btf_load(raw_btf, btf_len, NULL); btf_fd = bpf_btf_load(raw_btf, btf_len, &opts);
free(raw_btf); free(raw_btf);
return btf_fd; return btf_fd;
...@@ -271,7 +273,7 @@ static int load_local_storage_btf(void) ...@@ -271,7 +273,7 @@ static int load_local_storage_btf(void)
}; };
return libbpf__load_raw_btf((char *)types, sizeof(types), return libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs)); strs, sizeof(strs), 0);
} }
static int probe_map_create(enum bpf_map_type map_type) static int probe_map_create(enum bpf_map_type map_type)
......
...@@ -2,5 +2,8 @@ ...@@ -2,5 +2,8 @@
#ifndef __LIBBPF_STR_ERROR_H #ifndef __LIBBPF_STR_ERROR_H
#define __LIBBPF_STR_ERROR_H #define __LIBBPF_STR_ERROR_H
#define STRERR_BUFSIZE 128
char *libbpf_strerror_r(int err, char *dst, int len); char *libbpf_strerror_r(int err, char *dst, int len);
#endif /* __LIBBPF_STR_ERROR_H */ #endif /* __LIBBPF_STR_ERROR_H */
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_QUEUE);
__uint(max_entries, 1);
__type(value, __u32);
} priv_map SEC(".maps");
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
SEC("kprobe")
int kprobe_prog(void *ctx)
{
return 1;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment