Merge branch 'bench-fast-in-kernel-triggering-benchmarks'

Andrii Nakryiko says: ==================== bench: fast in-kernel triggering benchmarks Remove "legacy" triggering benchmarks which rely on syscalls (and thus syscall overhead is a noticeable part of benchmark, unfortunately). Replace them with faster versions that rely on triggering BPF programs in-kernel through another simple "driver" BPF program. See patch #2 with comparison results. raw_tp/tp/fmodret benchmarks required adding a simple kfunc in kernel to be able to trigger a simple tracepoint from BPF program (plus it is also allowed to be replaced by fmod_ret programs). This limits raw_tp/tp/fmodret benchmarks to new kernels only, but it keeps bench tool itself very portable and most of other benchmarks will still work on wide variety of kernels without the need to worry about building and deploying custom kernel module. See patches #5 and #6 for details. v1->v2: - move new TP closer to BPF test run code; - rename/move kfunc and register it for fmod_rets (Alexei); - limit --trig-batch-iters param to [1, 1000] (Alexei). ==================== Link: https://lore.kernel.org/r/20240326162151.3981687-1-andrii@kernel.orgSigned-off-by: Alexei Starovoitov <ast@kernel.org>

Merge branch 'bench-fast-in-kernel-triggering-benchmarks'
Andrii Nakryiko says: ==================== bench: fast in-kernel triggering benchmarks Remove "legacy" triggering benchmarks which rely on syscalls (and thus syscall overhead is a noticeable part of benchmark, unfortunately). Replace them with faster versions that rely on triggering BPF programs in-kernel through another simple "driver" BPF program. See patch #2 with comparison results. raw_tp/tp/fmodret benchmarks required adding a simple kfunc in kernel to be able to trigger a simple tracepoint from BPF program (plus it is also allowed to be replaced by fmod_ret programs). This limits raw_tp/tp/fmodret benchmarks to new kernels only, but it keeps bench tool itself very portable and most of other benchmarks will still work on wide variety of kernels without the need to worry about building and deploying custom kernel module. See patches #5 and #6 for details. v1->v2: - move new TP closer to BPF test run code; - rename/move kfunc and register it for fmod_rets (Alexei); - limit --trig-batch-iters param to [1, 1000] (Alexei). ==================== Link: https://lore.kernel.org/r/20240326162151.3981687-1-andrii@kernel.orgSigned-off-by: Alexei Starovoitov <ast@kernel.org>
a461a51e · Alexei Starovoitov · 55fc888d · 985d0681 · a461a51e · a461a51e
Commit a461a51e authored Mar 27, 2024 by Alexei Starovoitov
8 changed files
--- a/include/trace/events/bpf_test_run.h
+++ b/include/trace/events/bpf_test_run.h
@@ -7,6 +7,23 @@

 #include <linux/tracepoint.h>

+TRACE_EVENT(bpf_trigger_tp,
+
+	TP_PROTO(int nonce),
+
+	TP_ARGS(nonce),
+
+	TP_STRUCT__entry(
+		__field(int, nonce)
+	),
+
+	TP_fast_assign(
+		__entry->nonce = nonce;
+	),
+
+	TP_printk("nonce %d", __entry->nonce)
+);
+
 DECLARE_EVENT_CLASS(bpf_test_finish,

 	TP_PROTO(int *err),

--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2625,6 +2625,7 @@ BTF_ID_FLAGS(func, bpf_dynptr_is_null)
 BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
 BTF_ID_FLAGS(func, bpf_dynptr_size)
 BTF_ID_FLAGS(func, bpf_dynptr_clone)
+BTF_ID_FLAGS(func, bpf_modify_return_test_tp)
 BTF_KFUNCS_END(common_btf_ids)

 static const struct btf_kfunc_id_set common_kfunc_set = {

--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -575,6 +575,13 @@ __bpf_kfunc int bpf_modify_return_test2(int a, int *b, short c, int d,
 	return a + *b + c + d + (long)e + f + g;
 }

+__bpf_kfunc int bpf_modify_return_test_tp(int nonce)
+{
+	trace_bpf_trigger_tp(nonce);
+
+	return nonce;
+}
+
 int noinline bpf_fentry_shadow_test(int a)
 {
 	return a + 1;
@@ -622,6 +629,7 @@ __bpf_kfunc_end_defs();
 BTF_KFUNCS_START(bpf_test_modify_return_ids)
 BTF_ID_FLAGS(func, bpf_modify_return_test)
 BTF_ID_FLAGS(func, bpf_modify_return_test2)
+BTF_ID_FLAGS(func, bpf_modify_return_test_tp)
 BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE)
 BTF_KFUNCS_END(bpf_test_modify_return_ids)


--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -280,6 +280,7 @@ extern struct argp bench_strncmp_argp;
 extern struct argp bench_hashmap_lookup_argp;
 extern struct argp bench_local_storage_create_argp;
 extern struct argp bench_htab_mem_argp;
+extern struct argp bench_trigger_batch_argp;

 static const struct argp_child bench_parsers[] = {
 	{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -292,6 +293,7 @@ static const struct argp_child bench_parsers[] = {
 	{ &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 },
 	{ &bench_local_storage_create_argp, 0, "local-storage-create benchmark", 0 },
 	{ &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 },
+	{ &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 },
 	{},
 };

@@ -491,24 +493,31 @@ extern const struct bench bench_rename_kretprobe;
 extern const struct bench bench_rename_rawtp;
 extern const struct bench bench_rename_fentry;
 extern const struct bench bench_rename_fexit;
-extern const struct bench bench_trig_base;
-extern const struct bench bench_trig_tp;
-extern const struct bench bench_trig_rawtp;
+
+/* pure counting benchmarks to establish theoretical lmits */
+extern const struct bench bench_trig_usermode_count;
+extern const struct bench bench_trig_syscall_count;
+extern const struct bench bench_trig_kernel_count;
+
+/* batched, staying mostly in-kernel benchmarks */
 extern const struct bench bench_trig_kprobe;
 extern const struct bench bench_trig_kretprobe;
 extern const struct bench bench_trig_kprobe_multi;
 extern const struct bench bench_trig_kretprobe_multi;
 extern const struct bench bench_trig_fentry;
 extern const struct bench bench_trig_fexit;
-extern const struct bench bench_trig_fentry_sleep;
 extern const struct bench bench_trig_fmodret;
-extern const struct bench bench_trig_uprobe_base;
+extern const struct bench bench_trig_tp;
+extern const struct bench bench_trig_rawtp;
+
+/* uprobe/uretprobe benchmarks */
 extern const struct bench bench_trig_uprobe_nop;
 extern const struct bench bench_trig_uretprobe_nop;
 extern const struct bench bench_trig_uprobe_push;
 extern const struct bench bench_trig_uretprobe_push;
 extern const struct bench bench_trig_uprobe_ret;
 extern const struct bench bench_trig_uretprobe_ret;
+
 extern const struct bench bench_rb_libbpf;
 extern const struct bench bench_rb_custom;
 extern const struct bench bench_pb_libbpf;
@@ -539,24 +548,28 @@ static const struct bench *benchs[] = {
 	&bench_rename_rawtp,
 	&bench_rename_fentry,
 	&bench_rename_fexit,
-	&bench_trig_base,
-	&bench_trig_tp,
-	&bench_trig_rawtp,
+	/* pure counting benchmarks for establishing theoretical limits */
+	&bench_trig_usermode_count,
+	&bench_trig_kernel_count,
+	&bench_trig_syscall_count,
+	/* batched, staying mostly in-kernel triggers */
 	&bench_trig_kprobe,
 	&bench_trig_kretprobe,
 	&bench_trig_kprobe_multi,
 	&bench_trig_kretprobe_multi,
 	&bench_trig_fentry,
 	&bench_trig_fexit,
-	&bench_trig_fentry_sleep,
 	&bench_trig_fmodret,
-	&bench_trig_uprobe_base,
+	&bench_trig_tp,
+	&bench_trig_rawtp,
+	/* uprobes */
 	&bench_trig_uprobe_nop,
 	&bench_trig_uretprobe_nop,
 	&bench_trig_uprobe_push,
 	&bench_trig_uretprobe_push,
 	&bench_trig_uprobe_ret,
 	&bench_trig_uretprobe_ret,
+	/* ringbuf/perfbuf benchmarks */
 	&bench_rb_libbpf,
 	&bench_rb_custom,
 	&bench_pb_libbpf,

--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
--- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -2,8 +2,22 @@

 set -eufo pipefail

-for i in base tp rawtp kprobe fentry fmodret
-do
-	summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
-	printf "%-10s: %s\n" $i "$summary"
+def_tests=( \
+	usermode-count kernel-count syscall-count \
+	fentry fexit fmodret \
+	rawtp tp \
+	kprobe kprobe-multi \
+	kretprobe kretprobe-multi \
+)
+
+tests=("$@")
+if [ ${#tests[@]} -eq 0 ]; then
+	tests=("${def_tests[@]}")
+fi
+
+p=${PROD_CNT:-1}
+
+for t in "${tests[@]}"; do
+	summary=$(sudo ./bench -w2 -d5 -a -p$p trig-$t | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+	printf "%-15s: %s\n" $t "$summary"
 done
--- a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -2,7 +2,7 @@

 set -eufo pipefail

-for i in base {uprobe,uretprobe}-{nop,push,ret}
+for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret}
 do
 	summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
 	printf "%-15s: %s\n" $i "$summary"

--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2020 Facebook
-
 #include <linux/bpf.h>
 #include <asm/unistd.h>
 #include <bpf/bpf_helpers.h>
@@ -26,79 +25,108 @@ static __always_inline void inc_counter(void)
 	__sync_add_and_fetch(&hits[cpu & CPU_MASK].value, 1);
 }

-SEC("tp/syscalls/sys_enter_getpgid")
-int bench_trigger_tp(void *ctx)
+SEC("?uprobe")
+int bench_trigger_uprobe(void *ctx)
 {
 	inc_counter();
 	return 0;
 }

-SEC("raw_tp/sys_enter")
-int BPF_PROG(bench_trigger_raw_tp, struct pt_regs *regs, long id)
+const volatile int batch_iters = 0;
+
+SEC("?raw_tp")
+int trigger_count(void *ctx)
 {
-	if (id == __NR_getpgid)
+	int i;
+
+	for (i = 0; i < batch_iters; i++)
 		inc_counter();
+
+	return 0;
+}
+
+SEC("?raw_tp")
+int trigger_driver(void *ctx)
+{
+	int i;
+
+	for (i = 0; i < batch_iters; i++)
+		(void)bpf_get_numa_node_id(); /* attach point for benchmarking */
+
 	return 0;
 }

-SEC("kprobe/" SYS_PREFIX "sys_getpgid")
+extern int bpf_modify_return_test_tp(int nonce) __ksym __weak;
+
+SEC("?raw_tp")
+int trigger_driver_kfunc(void *ctx)
+{
+	int i;
+
+	for (i = 0; i < batch_iters; i++)
+		(void)bpf_modify_return_test_tp(0); /* attach point for benchmarking */
+
+	return 0;
+}
+
+SEC("?kprobe/bpf_get_numa_node_id")
 int bench_trigger_kprobe(void *ctx)
 {
 	inc_counter();
 	return 0;
 }

-SEC("kretprobe/" SYS_PREFIX "sys_getpgid")
+SEC("?kretprobe/bpf_get_numa_node_id")
 int bench_trigger_kretprobe(void *ctx)
 {
 	inc_counter();
 	return 0;
 }

-SEC("kprobe.multi/" SYS_PREFIX "sys_getpgid")
+SEC("?kprobe.multi/bpf_get_numa_node_id")
 int bench_trigger_kprobe_multi(void *ctx)
 {
 	inc_counter();
 	return 0;
 }

-SEC("kretprobe.multi/" SYS_PREFIX "sys_getpgid")
+SEC("?kretprobe.multi/bpf_get_numa_node_id")
 int bench_trigger_kretprobe_multi(void *ctx)
 {
 	inc_counter();
 	return 0;
 }

-SEC("fentry/" SYS_PREFIX "sys_getpgid")
+SEC("?fentry/bpf_get_numa_node_id")
 int bench_trigger_fentry(void *ctx)
 {
 	inc_counter();
 	return 0;
 }

-SEC("fexit/" SYS_PREFIX "sys_getpgid")
+SEC("?fexit/bpf_get_numa_node_id")
 int bench_trigger_fexit(void *ctx)
 {
 	inc_counter();
 	return 0;
 }

-SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
-int bench_trigger_fentry_sleep(void *ctx)
+SEC("?fmod_ret/bpf_modify_return_test_tp")
+int bench_trigger_fmodret(void *ctx)
 {
 	inc_counter();
-	return 0;
+	return -22;
 }

-SEC("fmod_ret/" SYS_PREFIX "sys_getpgid")
-int bench_trigger_fmodret(void *ctx)
+SEC("?tp/bpf_test_run/bpf_trigger_tp")
+int bench_trigger_tp(void *ctx)
 {
 	inc_counter();
-	return -22;
+	return 0;
 }

-SEC("uprobe")
-int bench_trigger_uprobe(void *ctx)
+SEC("?raw_tp/bpf_trigger_tp")
+int bench_trigger_rawtp(void *ctx)
 {
 	inc_counter();
 	return 0;