Merge branch 'bpf_enable_stats'

Song Liu says: ==================== run_time_ns is a useful stats for BPF programs. However, it is gated by sysctl kernel.bpf_stats_enabled. When multiple user space tools are toggling kernl.bpf_stats_enabled at the same time, they may confuse each other. Solve this problem with a new BPF command BPF_ENABLE_STATS. Changes v8 => v9: 1. Clean up in selftest (Andrii). 2. Not using static variable in test program (Andrii). Changes v7 => v8: 1. Change name BPF_STATS_RUNTIME_CNT => BPF_STATS_RUN_TIME (Alexei). 2. Add CHECK_ATTR to bpf_enable_stats() (Alexei). 3. Rebase (Andrii). 4. Simplfy the selftest (Alexei). Changes v6 => v7: 1. Add test to verify run_cnt matches count measured by the program. Changes v5 => v6: 1. Simplify test program (Yonghong). 2. Rebase (with some conflicts). Changes v4 => v5: 1. Use memset to zero bpf_attr in bpf_enable_stats() (Andrii). Changes v3 => v4: 1. Add libbpf support and selftest; 2. Avoid cleaning trailing space. Changes v2 => v3: 1. Rename the command to BPF_ENABLE_STATS, and make it extendible. 2. fix commit log; 3. remove unnecessary headers. ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>

Merge branch 'bpf_enable_stats'
Song Liu says: ==================== run_time_ns is a useful stats for BPF programs. However, it is gated by sysctl kernel.bpf_stats_enabled. When multiple user space tools are toggling kernl.bpf_stats_enabled at the same time, they may confuse each other. Solve this problem with a new BPF command BPF_ENABLE_STATS. Changes v8 => v9: 1. Clean up in selftest (Andrii). 2. Not using static variable in test program (Andrii). Changes v7 => v8: 1. Change name BPF_STATS_RUNTIME_CNT => BPF_STATS_RUN_TIME (Alexei). 2. Add CHECK_ATTR to bpf_enable_stats() (Alexei). 3. Rebase (Andrii). 4. Simplfy the selftest (Alexei). Changes v6 => v7: 1. Add test to verify run_cnt matches count measured by the program. Changes v5 => v6: 1. Simplify test program (Yonghong). 2. Rebase (with some conflicts). Changes v4 => v5: 1. Use memset to zero bpf_attr in bpf_enable_stats() (Andrii). Changes v3 => v4: 1. Add libbpf support and selftest; 2. Avoid cleaning trailing space. Changes v2 => v3: 1. Rename the command to BPF_ENABLE_STATS, and make it extendible. 2. fix commit log; 3. remove unnecessary headers. ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
3dbb5b50 · Alexei Starovoitov · c3210222 · 31a9f7fe · 3dbb5b50 · 3dbb5b50
Commit 3dbb5b50 authored May 01, 2020 by Alexei Starovoitov
10 changed files
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -987,6 +987,7 @@ _out:							\

 #ifdef CONFIG_BPF_SYSCALL
 DECLARE_PER_CPU(int, bpf_prog_active);
+extern struct mutex bpf_stats_enabled_mutex;

 /*
 * Block execution of BPF programs attached to instrumentation (perf,

--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -115,6 +115,7 @@ enum bpf_cmd {
 	BPF_LINK_UPDATE,
 	BPF_LINK_GET_FD_BY_ID,
 	BPF_LINK_GET_NEXT_ID,
+	BPF_ENABLE_STATS,
 };

 enum bpf_map_type {
@@ -390,6 +391,12 @@ enum {
 */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)

+/* type for BPF_ENABLE_STATS */
+enum bpf_stats_type {
+	/* enabled run_time_ns and run_cnt */
+	BPF_STATS_RUN_TIME = 0,
+};
+
 enum bpf_stack_build_id_status {
 	/* user space need an empty entry to identify end of a trace */
 	BPF_STACK_BUILD_ID_EMPTY = 0,
@@ -601,6 +608,10 @@ union bpf_attr {
 		__u32		old_prog_fd;
 	} link_update;

+	struct { /* struct used by BPF_ENABLE_STATS command */
+		__u32		type;
+	} enable_stats;
+
 } __attribute__((aligned(8)));

 /* The description below is an attempt at providing documentation to eBPF

--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3872,6 +3872,60 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
 	return fd;
 }

+DEFINE_MUTEX(bpf_stats_enabled_mutex);
+
+static int bpf_stats_release(struct inode *inode, struct file *file)
+{
+	mutex_lock(&bpf_stats_enabled_mutex);
+	static_key_slow_dec(&bpf_stats_enabled_key.key);
+	mutex_unlock(&bpf_stats_enabled_mutex);
+	return 0;
+}
+
+static const struct file_operations bpf_stats_fops = {
+	.release = bpf_stats_release,
+};
+
+static int bpf_enable_runtime_stats(void)
+{
+	int fd;
+
+	mutex_lock(&bpf_stats_enabled_mutex);
+
+	/* Set a very high limit to avoid overflow */
+	if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) {
+		mutex_unlock(&bpf_stats_enabled_mutex);
+		return -EBUSY;
+	}
+
+	fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC);
+	if (fd >= 0)
+		static_key_slow_inc(&bpf_stats_enabled_key.key);
+
+	mutex_unlock(&bpf_stats_enabled_mutex);
+	return fd;
+}
+
+#define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type
+
+static int bpf_enable_stats(union bpf_attr *attr)
+{
+
+	if (CHECK_ATTR(BPF_ENABLE_STATS))
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	switch (attr->enable_stats.type) {
+	case BPF_STATS_RUN_TIME:
+		return bpf_enable_runtime_stats();
+	default:
+		break;
+	}
+	return -EINVAL;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr;
@@ -3996,6 +4050,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 		err = bpf_obj_get_next_id(&attr, uattr,
 					  &link_idr, &link_idr_lock);
 		break;
+	case BPF_ENABLE_STATS:
+		err = bpf_enable_stats(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;

--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -201,6 +201,40 @@ static int max_extfrag_threshold = 1000;

 #endif /* CONFIG_SYSCTL */

+#ifdef CONFIG_BPF_SYSCALL
+static int bpf_stats_handler(struct ctl_table *table, int write,
+			     void __user *buffer, size_t *lenp,
+			     loff_t *ppos)
+{
+	struct static_key *key = (struct static_key *)table->data;
+	static int saved_val;
+	int val, ret;
+	struct ctl_table tmp = {
+		.data   = &val,
+		.maxlen = sizeof(val),
+		.mode   = table->mode,
+		.extra1 = SYSCTL_ZERO,
+		.extra2 = SYSCTL_ONE,
+	};
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	mutex_lock(&bpf_stats_enabled_mutex);
+	val = saved_val;
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+	if (write && !ret && val != saved_val) {
+		if (val)
+			static_key_slow_inc(key);
+		else
+			static_key_slow_dec(key);
+		saved_val = val;
+	}
+	mutex_unlock(&bpf_stats_enabled_mutex);
+	return ret;
+}
+#endif
+
 /*
 * /proc/sys support
 */
@@ -2549,7 +2583,7 @@ static struct ctl_table kern_table[] = {
 		.data		= &bpf_stats_enabled_key.key,
 		.maxlen		= sizeof(bpf_stats_enabled_key),
 		.mode		= 0644,
-		.proc_handler	= proc_do_static_key,
+		.proc_handler	= bpf_stats_handler,
 	},
 #endif
 #if defined(CONFIG_TREE_RCU)

--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -115,6 +115,7 @@ enum bpf_cmd {
 	BPF_LINK_UPDATE,
 	BPF_LINK_GET_FD_BY_ID,
 	BPF_LINK_GET_NEXT_ID,
+	BPF_ENABLE_STATS,
 };

 enum bpf_map_type {
@@ -390,6 +391,12 @@ enum {
 */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)

+/* type for BPF_ENABLE_STATS */
+enum bpf_stats_type {
+	/* enabled run_time_ns and run_cnt */
+	BPF_STATS_RUN_TIME = 0,
+};
+
 enum bpf_stack_build_id_status {
 	/* user space need an empty entry to identify end of a trace */
 	BPF_STACK_BUILD_ID_EMPTY = 0,
@@ -601,6 +608,10 @@ union bpf_attr {
 		__u32		old_prog_fd;
 	} link_update;

+	struct { /* struct used by BPF_ENABLE_STATS command */
+		__u32		type;
+	} enable_stats;
+
 } __attribute__((aligned(8)));

 /* The description below is an attempt at providing documentation to eBPF

--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -841,3 +841,13 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,

 	return err;
 }
+
+int bpf_enable_stats(enum bpf_stats_type type)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.enable_stats.type = type;
+
+	return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+}
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -231,6 +231,7 @@ LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf,
 LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
 				 __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
 				 __u64 *probe_offset, __u64 *probe_addr);
+LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);

 #ifdef __cplusplus
 } /* extern "C" */

--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -257,6 +257,7 @@ LIBBPF_0.0.8 {

 LIBBPF_0.0.9 {
 	global:
+		bpf_enable_stats;
 		bpf_link_get_fd_by_id;
 		bpf_link_get_next_id;
 } LIBBPF_0.0.8;
--- a/tools/testing/selftests/bpf/prog_tests/enable_stats.c
+++ b/tools/testing/selftests/bpf/prog_tests/enable_stats.c
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "test_enable_stats.skel.h"
+
+void test_enable_stats(void)
+{
+	struct test_enable_stats *skel;
+	int stats_fd, err, prog_fd;
+	struct bpf_prog_info info;
+	__u32 info_len = sizeof(info);
+	int duration = 0;
+
+	skel = test_enable_stats__open_and_load();
+	if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
+		return;
+
+	stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+	if (CHECK(stats_fd < 0, "get_stats_fd", "failed %d\n", errno)) {
+		test_enable_stats__destroy(skel);
+		return;
+	}
+
+	err = test_enable_stats__attach(skel);
+	if (CHECK(err, "attach_raw_tp", "err %d\n", err))
+		goto cleanup;
+
+	test_enable_stats__detach(skel);
+
+	prog_fd = bpf_program__fd(skel->progs.test_enable_stats);
+	memset(&info, 0, info_len);
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (CHECK(err, "get_prog_info",
+		  "failed to get bpf_prog_info for fd %d\n", prog_fd))
+		goto cleanup;
+	if (CHECK(info.run_time_ns == 0, "check_stats_enabled",
+		  "failed to enable run_time_ns stats\n"))
+		goto cleanup;
+
+	CHECK(info.run_cnt != skel->bss->count, "check_run_cnt_valid",
+	      "invalid run_cnt stats\n");
+
+cleanup:
+	test_enable_stats__destroy(skel);
+	close(stats_fd);
+}
--- a/tools/testing/selftests/bpf/progs/test_enable_stats.c
+++ b/tools/testing/selftests/bpf/progs/test_enable_stats.c
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 count = 0;
+
+SEC("raw_tracepoint/sys_enter")
+int test_enable_stats(void *ctx)
+{
+	count += 1;
+	return 0;
+}