Merge branch 'libbpf-probing-improvements'

Andrii Nakryiko says: ==================== This patch set refactors libbpf feature probing to be done lazily on as-needed basis, instead of proactively testing all possible features libbpf knows about. This allows to scale such detections and mitigations better, without issuing unnecessary syscalls on each bpf_object__load() call. It's also now memoized globally, instead of per-bpf_object. Building on that, libbpf will now detect availability of bpf_probe_read_kernel() helper (which means also -user and -str variants), and will sanitize BPF program code by replacing such references to generic variants (bpf_probe_read[_str]()). This allows to migrate all BPF programs into proper -kernel/-user probing helpers, without the fear of breaking them for old kernels. With that, update BPF_CORE_READ() and related macros to use bpf_probe_read_kernel(), as it doesn't make much sense to do CO-RE relocations against user-space types. And the only class of cases in which BPF program might read kernel type from user-space are UAPI data structures which by definition are fixed in their memory layout and don't need relocating. This is exemplified by test_vmlinux test, which is fixed as part of this patch set as well. BPF_CORE_READ() is useful for chainingg bpf_probe_read_{kernel,user}() calls together even without relocation, so we might add user-space variants, if there is a need. While at making libbpf more useful for older kernels, also improve handling of a complete lack of BTF support in kernel by not even attempting to load BTF info into kernel. This eliminates annoying warning about lack of BTF support in the kernel and map creation retry without BTF. If user is using features that require kernel BTF support, it will still fail, of course. ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>

Merge branch 'libbpf-probing-improvements'
Andrii Nakryiko says: ==================== This patch set refactors libbpf feature probing to be done lazily on as-needed basis, instead of proactively testing all possible features libbpf knows about. This allows to scale such detections and mitigations better, without issuing unnecessary syscalls on each bpf_object__load() call. It's also now memoized globally, instead of per-bpf_object. Building on that, libbpf will now detect availability of bpf_probe_read_kernel() helper (which means also -user and -str variants), and will sanitize BPF program code by replacing such references to generic variants (bpf_probe_read[_str]()). This allows to migrate all BPF programs into proper -kernel/-user probing helpers, without the fear of breaking them for old kernels. With that, update BPF_CORE_READ() and related macros to use bpf_probe_read_kernel(), as it doesn't make much sense to do CO-RE relocations against user-space types. And the only class of cases in which BPF program might read kernel type from user-space are UAPI data structures which by definition are fixed in their memory layout and don't need relocating. This is exemplified by test_vmlinux test, which is fixed as part of this patch set as well. BPF_CORE_READ() is useful for chainingg bpf_probe_read_{kernel,user}() calls together even without relocation, so we might add user-space variants, if there is a need. While at making libbpf more useful for older kernels, also improve handling of a complete lack of BTF support in kernel by not even attempting to load BTF info into kernel. This eliminates annoying warning about lack of BTF support in the kernel and map creation retry without BTF. If user is using features that require kernel BTF support, it will still fail, of course. ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
a12a625c · Alexei Starovoitov · 65bb2e0f · 68b08647 · a12a625c · a12a625c
Commit a12a625c authored Aug 18, 2020 by Alexei Starovoitov
5 changed files
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -107,7 +107,7 @@ ifeq ($(feature-reallocarray), 0)
 endif

 # Append required CFLAGS
-override CFLAGS += $(EXTRA_WARNINGS)
+override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum
 override CFLAGS += -Werror -Wall
 override CFLAGS += -fPIC
 override CFLAGS += $(INCLUDES)

--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -24,27 +24,29 @@ enum bpf_field_info_kind {

 #if __BYTE_ORDER == __LITTLE_ENDIAN
 #define __CORE_BITFIELD_PROBE_READ(dst, src, fld)			      \
-	bpf_probe_read((void *)dst,					      \
-		       __CORE_RELO(src, fld, BYTE_SIZE),		      \
-		       (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
+	bpf_probe_read_kernel(						      \
+			(void *)dst,				      \
+			__CORE_RELO(src, fld, BYTE_SIZE),		      \
+			(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
 #else
 /* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so
 * for big-endian we need to adjust destination pointer accordingly, based on
 * field byte size
 */
 #define __CORE_BITFIELD_PROBE_READ(dst, src, fld)			      \
-	bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)),  \
-		       __CORE_RELO(src, fld, BYTE_SIZE),		      \
-		       (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
+	bpf_probe_read_kernel(						      \
+			(void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \
+			__CORE_RELO(src, fld, BYTE_SIZE),		      \
+			(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
 #endif

 /*
 * Extract bitfield, identified by s->field, and return its value as u64.
 * All this is done in relocatable manner, so bitfield changes such as
 * signedness, bit size, offset changes, this will be handled automatically.
- * This version of macro is using bpf_probe_read() to read underlying integer
- * storage. Macro functions as an expression and its return type is
- * bpf_probe_read()'s return value: 0, on success, <0 on error.
+ * This version of macro is using bpf_probe_read_kernel() to read underlying
+ * integer storage. Macro functions as an expression and its return type is
+ * bpf_probe_read_kernel()'s return value: 0, on success, <0 on error.
 */
 #define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({			      \
 	unsigned long long val = 0;					      \
@@ -99,8 +101,8 @@ enum bpf_field_info_kind {
 	__builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE)

 /*
- * bpf_core_read() abstracts away bpf_probe_read() call and captures offset
- * relocation for source address using __builtin_preserve_access_index()
+ * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures
+ * offset relocation for source address using __builtin_preserve_access_index()
 * built-in, provided by Clang.
 *
 * __builtin_preserve_access_index() takes as an argument an expression of
@@ -115,8 +117,8 @@ enum bpf_field_info_kind {
 * (local) BTF, used to record relocation.
 */
 #define bpf_core_read(dst, sz, src)					    \
-	bpf_probe_read(dst, sz,						    \
-		       (const void *)__builtin_preserve_access_index(src))
+	bpf_probe_read_kernel(dst, sz,					    \
+			      (const void *)__builtin_preserve_access_index(src))

 /*
 * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str()
@@ -124,8 +126,8 @@ enum bpf_field_info_kind {
 * argument.
 */
 #define bpf_core_read_str(dst, sz, src)					    \
-	bpf_probe_read_str(dst, sz,					    \
-			   (const void *)__builtin_preserve_access_index(src))
+	bpf_probe_read_kernel_str(dst, sz,				    \
+				  (const void *)__builtin_preserve_access_index(src))

 #define ___concat(a, b) a ## b
 #define ___apply(fn, n) ___concat(fn, n)
@@ -239,15 +241,17 @@ enum bpf_field_info_kind {
 *	int x = BPF_CORE_READ(s, a.b.c, d.e, f, g);
 *
 * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF
- * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to:
+ * CO-RE relocatable bpf_probe_read_kernel() wrapper) calls, logically
+ * equivalent to:
 * 1. const void *__t = s->a.b.c;
 * 2. __t = __t->d.e;
 * 3. __t = __t->f;
 * 4. return __t->g;
 *
 * Equivalence is logical, because there is a heavy type casting/preservation
- * involved, as well as all the reads are happening through bpf_probe_read()
- * calls using __builtin_preserve_access_index() to emit CO-RE relocations.
+ * involved, as well as all the reads are happening through
+ * bpf_probe_read_kernel() calls using __builtin_preserve_access_index() to
+ * emit CO-RE relocations.
 *
 * N.B. Only up to 9 "field accessors" are supported, which should be more
 * than enough for any practical purpose.

--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -289,9 +289,9 @@ struct pt_regs;
 #define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
 #else
 #define BPF_KPROBE_READ_RET_IP(ip, ctx)					    \
-	({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
+	({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
 #define BPF_KRETPROBE_READ_RET_IP(ip, ctx)				    \
-	({ bpf_probe_read(&(ip), sizeof(ip),				    \
+	({ bpf_probe_read_kernel(&(ip), sizeof(ip),			    \
 			  (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
 #endif


--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
--- a/tools/testing/selftests/bpf/progs/test_vmlinux.c
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -19,12 +19,14 @@ SEC("tp/syscalls/sys_enter_nanosleep")
 int handle__tp(struct trace_event_raw_sys_enter *args)
 {
 	struct __kernel_timespec *ts;
+	long tv_nsec;

 	if (args->id != __NR_nanosleep)
 		return 0;

 	ts = (void *)args->args[0];
-	if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+	if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+	    tv_nsec != MY_TV_NSEC)
 		return 0;

 	tp_called = true;
@@ -35,12 +37,14 @@ SEC("raw_tp/sys_enter")
 int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id)
 {
 	struct __kernel_timespec *ts;
+	long tv_nsec;

 	if (id != __NR_nanosleep)
 		return 0;

 	ts = (void *)PT_REGS_PARM1_CORE(regs);
-	if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+	if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+	    tv_nsec != MY_TV_NSEC)
 		return 0;

 	raw_tp_called = true;
@@ -51,12 +55,14 @@ SEC("tp_btf/sys_enter")
 int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
 {
 	struct __kernel_timespec *ts;
+	long tv_nsec;

 	if (id != __NR_nanosleep)
 		return 0;

 	ts = (void *)PT_REGS_PARM1_CORE(regs);
-	if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+	if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+	    tv_nsec != MY_TV_NSEC)
 		return 0;

 	tp_btf_called = true;