Commit 3dbb5b50 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf_enable_stats'

Song Liu says:

====================
run_time_ns is a useful stats for BPF programs. However, it is gated by
sysctl kernel.bpf_stats_enabled. When multiple user space tools are
toggling kernl.bpf_stats_enabled at the same time, they may confuse each
other.

Solve this problem with a new BPF command BPF_ENABLE_STATS.

Changes v8 => v9:
  1. Clean up in selftest (Andrii).
  2. Not using static variable in test program (Andrii).

Changes v7 => v8:
  1. Change name BPF_STATS_RUNTIME_CNT => BPF_STATS_RUN_TIME (Alexei).
  2. Add CHECK_ATTR to bpf_enable_stats() (Alexei).
  3. Rebase (Andrii).
  4. Simplfy the selftest (Alexei).

Changes v6 => v7:
  1. Add test to verify run_cnt matches count measured by the program.

Changes v5 => v6:
  1. Simplify test program (Yonghong).
  2. Rebase (with some conflicts).

Changes v4 => v5:
  1. Use memset to zero bpf_attr in bpf_enable_stats() (Andrii).

Changes v3 => v4:
  1. Add libbpf support and selftest;
  2. Avoid cleaning trailing space.

Changes v2 => v3:
  1. Rename the command to BPF_ENABLE_STATS, and make it extendible.
  2. fix commit log;
  3. remove unnecessary headers.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents c3210222 31a9f7fe
......@@ -987,6 +987,7 @@ _out: \
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
extern struct mutex bpf_stats_enabled_mutex;
/*
* Block execution of BPF programs attached to instrumentation (perf,
......
......@@ -115,6 +115,7 @@ enum bpf_cmd {
BPF_LINK_UPDATE,
BPF_LINK_GET_FD_BY_ID,
BPF_LINK_GET_NEXT_ID,
BPF_ENABLE_STATS,
};
enum bpf_map_type {
......@@ -390,6 +391,12 @@ enum {
*/
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
/* type for BPF_ENABLE_STATS */
enum bpf_stats_type {
/* enabled run_time_ns and run_cnt */
BPF_STATS_RUN_TIME = 0,
};
enum bpf_stack_build_id_status {
/* user space need an empty entry to identify end of a trace */
BPF_STACK_BUILD_ID_EMPTY = 0,
......@@ -601,6 +608,10 @@ union bpf_attr {
__u32 old_prog_fd;
} link_update;
struct { /* struct used by BPF_ENABLE_STATS command */
__u32 type;
} enable_stats;
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
......
......@@ -3872,6 +3872,60 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
return fd;
}
DEFINE_MUTEX(bpf_stats_enabled_mutex);
static int bpf_stats_release(struct inode *inode, struct file *file)
{
mutex_lock(&bpf_stats_enabled_mutex);
static_key_slow_dec(&bpf_stats_enabled_key.key);
mutex_unlock(&bpf_stats_enabled_mutex);
return 0;
}
static const struct file_operations bpf_stats_fops = {
.release = bpf_stats_release,
};
static int bpf_enable_runtime_stats(void)
{
int fd;
mutex_lock(&bpf_stats_enabled_mutex);
/* Set a very high limit to avoid overflow */
if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) {
mutex_unlock(&bpf_stats_enabled_mutex);
return -EBUSY;
}
fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC);
if (fd >= 0)
static_key_slow_inc(&bpf_stats_enabled_key.key);
mutex_unlock(&bpf_stats_enabled_mutex);
return fd;
}
#define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type
static int bpf_enable_stats(union bpf_attr *attr)
{
if (CHECK_ATTR(BPF_ENABLE_STATS))
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
switch (attr->enable_stats.type) {
case BPF_STATS_RUN_TIME:
return bpf_enable_runtime_stats();
default:
break;
}
return -EINVAL;
}
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr;
......@@ -3996,6 +4050,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
err = bpf_obj_get_next_id(&attr, uattr,
&link_idr, &link_idr_lock);
break;
case BPF_ENABLE_STATS:
err = bpf_enable_stats(&attr);
break;
default:
err = -EINVAL;
break;
......
......@@ -201,6 +201,40 @@ static int max_extfrag_threshold = 1000;
#endif /* CONFIG_SYSCTL */
#ifdef CONFIG_BPF_SYSCALL
static int bpf_stats_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
struct static_key *key = (struct static_key *)table->data;
static int saved_val;
int val, ret;
struct ctl_table tmp = {
.data = &val,
.maxlen = sizeof(val),
.mode = table->mode,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
};
if (write && !capable(CAP_SYS_ADMIN))
return -EPERM;
mutex_lock(&bpf_stats_enabled_mutex);
val = saved_val;
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && !ret && val != saved_val) {
if (val)
static_key_slow_inc(key);
else
static_key_slow_dec(key);
saved_val = val;
}
mutex_unlock(&bpf_stats_enabled_mutex);
return ret;
}
#endif
/*
* /proc/sys support
*/
......@@ -2549,7 +2583,7 @@ static struct ctl_table kern_table[] = {
.data = &bpf_stats_enabled_key.key,
.maxlen = sizeof(bpf_stats_enabled_key),
.mode = 0644,
.proc_handler = proc_do_static_key,
.proc_handler = bpf_stats_handler,
},
#endif
#if defined(CONFIG_TREE_RCU)
......
......@@ -115,6 +115,7 @@ enum bpf_cmd {
BPF_LINK_UPDATE,
BPF_LINK_GET_FD_BY_ID,
BPF_LINK_GET_NEXT_ID,
BPF_ENABLE_STATS,
};
enum bpf_map_type {
......@@ -390,6 +391,12 @@ enum {
*/
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
/* type for BPF_ENABLE_STATS */
enum bpf_stats_type {
/* enabled run_time_ns and run_cnt */
BPF_STATS_RUN_TIME = 0,
};
enum bpf_stack_build_id_status {
/* user space need an empty entry to identify end of a trace */
BPF_STACK_BUILD_ID_EMPTY = 0,
......@@ -601,6 +608,10 @@ union bpf_attr {
__u32 old_prog_fd;
} link_update;
struct { /* struct used by BPF_ENABLE_STATS command */
__u32 type;
} enable_stats;
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
......
......@@ -841,3 +841,13 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
return err;
}
int bpf_enable_stats(enum bpf_stats_type type)
{
union bpf_attr attr;
memset(&attr, 0, sizeof(attr));
attr.enable_stats.type = type;
return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
}
......@@ -231,6 +231,7 @@ LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf,
LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
__u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
__u64 *probe_offset, __u64 *probe_addr);
LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);
#ifdef __cplusplus
} /* extern "C" */
......
......@@ -257,6 +257,7 @@ LIBBPF_0.0.8 {
LIBBPF_0.0.9 {
global:
bpf_enable_stats;
bpf_link_get_fd_by_id;
bpf_link_get_next_id;
} LIBBPF_0.0.8;
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "test_enable_stats.skel.h"
void test_enable_stats(void)
{
struct test_enable_stats *skel;
int stats_fd, err, prog_fd;
struct bpf_prog_info info;
__u32 info_len = sizeof(info);
int duration = 0;
skel = test_enable_stats__open_and_load();
if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
return;
stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
if (CHECK(stats_fd < 0, "get_stats_fd", "failed %d\n", errno)) {
test_enable_stats__destroy(skel);
return;
}
err = test_enable_stats__attach(skel);
if (CHECK(err, "attach_raw_tp", "err %d\n", err))
goto cleanup;
test_enable_stats__detach(skel);
prog_fd = bpf_program__fd(skel->progs.test_enable_stats);
memset(&info, 0, info_len);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (CHECK(err, "get_prog_info",
"failed to get bpf_prog_info for fd %d\n", prog_fd))
goto cleanup;
if (CHECK(info.run_time_ns == 0, "check_stats_enabled",
"failed to enable run_time_ns stats\n"))
goto cleanup;
CHECK(info.run_cnt != skel->bss->count, "check_run_cnt_valid",
"invalid run_cnt stats\n");
cleanup:
test_enable_stats__destroy(skel);
close(stats_fd);
}
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Facebook
#include <linux/bpf.h>
#include <stdint.h>
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
__u64 count = 0;
SEC("raw_tracepoint/sys_enter")
int test_enable_stats(void *ctx)
{
count += 1;
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment