Commit 60a128b5 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf: BTF support for ksyms'

Hao Luo says:

====================
v3 -> v4:
 - Rebasing
 - Cast bpf_[per|this]_cpu_ptr's parameter to void __percpu * before
   passing into per_cpu_ptr.

v2 -> v3:
 - Rename functions and variables in verifier for better readability.
 - Stick to logging message convention in libbpf.
 - Move bpf_per_cpu_ptr and bpf_this_cpu_ptr from trace-specific
   helper set to base helper set.
 - More specific test in ksyms_btf.
 - Fix return type cast in bpf_*_cpu_ptr.
 - Fix btf leak in ksyms_btf selftest.
 - Fix return error code for kallsyms_find().

v1 -> v2:
 - Move check_pseudo_btf_id from check_ld_imm() to
   replace_map_fd_with_map_ptr() and rename the latter.
 - Add bpf_this_cpu_ptr().
 - Use bpf_core_types_are_compat() in libbpf.c for checking type
   compatibility.
 - Rewrite typed ksym extern type in BTF with int to save space.
 - Minor revision of bpf_per_cpu_ptr()'s comments.
 - Avoid using long in tests that use skeleton.
 - Refactored test_ksyms.c by moving kallsyms_find() to trace_helpers.c
 - Fold the patches that sync include/linux/uapi and
   tools/include/linux/uapi.

rfc -> v1:
 - Encode VAR's btf_id for PSEUDO_BTF_ID.
 - More checks in verifier. Checking the btf_id passed as
   PSEUDO_BTF_ID is valid VAR, its name and type.
 - Checks in libbpf on type compatibility of ksyms.
 - Add bpf_per_cpu_ptr() to access kernel percpu vars. Introduced
   new ARG and RET types for this helper.

This patch series extends the previously added __ksym externs with
btf support.

Right now the __ksym externs are treated as pure 64-bit scalar value.
Libbpf replaces ld_imm64 insn of __ksym by its kernel address at load
time. This patch series extend those externs with their btf info. Note
that btf support for __ksym must come with the kernel btf that has
VARs encoded to work properly. The corresponding chagnes in pahole
is available at [1] (with a fix at [2] for gcc 4.9+).

The first 3 patches in this series add support for general kernel
global variables, which include verifier checking (01/06), libpf
support (02/06) and selftests for getting typed ksym extern's kernel
address (03/06).

The next 3 patches extends that capability further by introducing
helpers bpf_per_cpu_ptr() and bpf_this_cpu_ptr(), which allows accessing
kernel percpu variables correctly (04/06 and 05/06).

The tests of this feature were performed against pahole that is extended
with [1] and [2]. For kernel BTF that does not have VARs encoded, the
selftests will be skipped.

[1] https://git.kernel.org/pub/scm/devel/pahole/pahole.git/commit/?id=f3d9054ba8ff1df0fc44e507e3a01c0964cabd42
[2] https://www.spinics.net/lists/dwarves/msg00451.html
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 440c5752 00dc73e4
......@@ -293,6 +293,7 @@ enum bpf_arg_type {
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
__BPF_ARG_TYPE_MAX,
};
......@@ -307,6 +308,8 @@ enum bpf_return_type {
RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */
RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */
RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */
};
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
......@@ -405,6 +408,7 @@ enum bpf_reg_type {
PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
};
/* The information passed from prog-specific *_is_valid_access
......@@ -1828,6 +1832,8 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
extern const struct bpf_func_proto bpf_copy_from_user_proto;
extern const struct bpf_func_proto bpf_snprintf_btf_proto;
extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
......
......@@ -308,6 +308,13 @@ struct bpf_insn_aux_data {
u32 map_index; /* index into used_maps[] */
u32 map_off; /* offset from value base address */
};
struct {
enum bpf_reg_type reg_type; /* type of pseudo_btf_id */
union {
u32 btf_id; /* btf_id for struct typed var */
u32 mem_size; /* mem_size for non-struct typed var */
};
} btf_var;
};
u64 map_key_state; /* constant (32 bit) key tracking for maps */
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
......
......@@ -110,6 +110,11 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type,
i < btf_type_vlen(struct_type); \
i++, member++)
#define for_each_vsi(i, datasec_type, member) \
for (i = 0, member = btf_type_var_secinfo(datasec_type); \
i < btf_type_vlen(datasec_type); \
i++, member++)
static inline bool btf_type_is_ptr(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
......@@ -145,6 +150,21 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
}
static inline bool btf_type_is_var(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
}
/* union is only a special case of struct:
* all its offsetof(member) == 0
*/
static inline bool btf_type_is_struct(const struct btf_type *t)
{
u8 kind = BTF_INFO_KIND(t->info);
return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
}
static inline u16 btf_type_vlen(const struct btf_type *t)
{
return BTF_INFO_VLEN(t->info);
......@@ -179,6 +199,12 @@ static inline const struct btf_member *btf_type_member(const struct btf_type *t)
return (const struct btf_member *)(t + 1);
}
static inline const struct btf_var_secinfo *btf_type_var_secinfo(
const struct btf_type *t)
{
return (const struct btf_var_secinfo *)(t + 1);
}
#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
......
......@@ -356,18 +356,36 @@ enum bpf_link_type {
#define BPF_F_SLEEPABLE (1U << 4)
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* two extensions:
*
* insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
* insn[0].imm: map fd map fd
* insn[1].imm: 0 offset into value
* insn[0].off: 0 0
* insn[1].off: 0 0
* ldimm64 rewrite: address of map address of map[0]+offset
* verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
* the following extensions:
*
* insn[0].src_reg: BPF_PSEUDO_MAP_FD
* insn[0].imm: map fd
* insn[1].imm: 0
* insn[0].off: 0
* insn[1].off: 0
* ldimm64 rewrite: address of map
* verifier type: CONST_PTR_TO_MAP
*/
#define BPF_PSEUDO_MAP_FD 1
/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE
* insn[0].imm: map fd
* insn[1].imm: offset into value
* insn[0].off: 0
* insn[1].off: 0
* ldimm64 rewrite: address of map[0]+offset
* verifier type: PTR_TO_MAP_VALUE
*/
#define BPF_PSEUDO_MAP_VALUE 2
/* insn[0].src_reg: BPF_PSEUDO_BTF_ID
* insn[0].imm: kernel btd id of VAR
* insn[1].imm: 0
* insn[0].off: 0
* insn[1].off: 0
* ldimm64 rewrite: address of the kernel variable
* verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var
* is struct/union.
*/
#define BPF_PSEUDO_BTF_ID 3
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
......@@ -3668,6 +3686,35 @@ union bpf_attr {
* Return
* The helper returns **TC_ACT_REDIRECT** on success or
* **TC_ACT_SHOT** on error.
*
* void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
* Description
* Take a pointer to a percpu ksym, *percpu_ptr*, and return a
* pointer to the percpu kernel variable on *cpu*. A ksym is an
* extern variable decorated with '__ksym'. For ksym, there is a
* global var (either static or global) defined of the same name
* in the kernel. The ksym is percpu if the global var is percpu.
* The returned pointer points to the global percpu var on *cpu*.
*
* bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
* kernel, except that bpf_per_cpu_ptr() may return NULL. This
* happens if *cpu* is larger than nr_cpu_ids. The caller of
* bpf_per_cpu_ptr() must check the returned value.
* Return
* A pointer pointing to the kernel percpu variable on *cpu*, or
* NULL, if *cpu* is invalid.
*
* void *bpf_this_cpu_ptr(const void *percpu_ptr)
* Description
* Take a pointer to a percpu ksym, *percpu_ptr*, and return a
* pointer to the percpu kernel variable on this cpu. See the
* description of 'ksym' in **bpf_per_cpu_ptr**\ ().
*
* bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in
* the kernel. Different from **bpf_per_cpu_ptr**\ (), it would
* never return NULL.
* Return
* A pointer pointing to the kernel percpu variable on this cpu.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -3823,6 +3870,8 @@ union bpf_attr {
FN(seq_printf_btf), \
FN(skb_cgroup_classid), \
FN(redirect_neigh), \
FN(bpf_per_cpu_ptr), \
FN(bpf_this_cpu_ptr), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
......
......@@ -188,11 +188,6 @@
i < btf_type_vlen(struct_type); \
i++, member++)
#define for_each_vsi(i, struct_type, member) \
for (i = 0, member = btf_type_var_secinfo(struct_type); \
i < btf_type_vlen(struct_type); \
i++, member++)
#define for_each_vsi_from(i, from, struct_type, member) \
for (i = from, member = btf_type_var_secinfo(struct_type) + from; \
i < btf_type_vlen(struct_type); \
......@@ -440,16 +435,6 @@ static bool btf_type_nosize_or_null(const struct btf_type *t)
return !t || btf_type_nosize(t);
}
/* union is only a special case of struct:
* all its offsetof(member) == 0
*/
static bool btf_type_is_struct(const struct btf_type *t)
{
u8 kind = BTF_INFO_KIND(t->info);
return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
}
static bool __btf_type_is_struct(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
......@@ -460,11 +445,6 @@ static bool btf_type_is_array(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
}
static bool btf_type_is_var(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
}
static bool btf_type_is_datasec(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
......@@ -613,11 +593,6 @@ static const struct btf_var *btf_type_var(const struct btf_type *t)
return (const struct btf_var *)(t + 1);
}
static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t)
{
return (const struct btf_var_secinfo *)(t + 1);
}
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
{
return kind_ops[BTF_INFO_KIND(t->info)];
......
......@@ -623,6 +623,34 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
{
if (cpu >= nr_cpu_ids)
return (unsigned long)NULL;
return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
}
const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
.func = bpf_per_cpu_ptr,
.gpl_only = false,
.ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
.arg2_type = ARG_ANYTHING,
};
BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
{
return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
}
const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
.func = bpf_this_cpu_ptr,
.gpl_only = false,
.ret_type = RET_PTR_TO_MEM_OR_BTF_ID,
.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
};
const struct bpf_func_proto bpf_get_current_task_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
......@@ -689,6 +717,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_snprintf_btf_proto;
case BPF_FUNC_jiffies64:
return &bpf_jiffies64_proto;
case BPF_FUNC_bpf_per_cpu_ptr:
return &bpf_per_cpu_ptr_proto;
case BPF_FUNC_bpf_this_cpu_ptr:
return &bpf_this_cpu_ptr_proto;
default:
break;
}
......
......@@ -238,6 +238,8 @@ struct bpf_call_arg_meta {
u64 msize_max_value;
int ref_obj_id;
int func_id;
u32 btf_id;
u32 ret_btf_id;
};
struct btf *btf_vmlinux;
......@@ -517,6 +519,7 @@ static const char * const reg_type_str[] = {
[PTR_TO_XDP_SOCK] = "xdp_sock",
[PTR_TO_BTF_ID] = "ptr_",
[PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
[PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
[PTR_TO_MEM] = "mem",
[PTR_TO_MEM_OR_NULL] = "mem_or_null",
[PTR_TO_RDONLY_BUF] = "rdonly_buf",
......@@ -583,7 +586,9 @@ static void print_verifier_state(struct bpf_verifier_env *env,
/* reg->off should be 0 for SCALAR_VALUE */
verbose(env, "%lld", reg->var_off.value + reg->off);
} else {
if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL)
if (t == PTR_TO_BTF_ID ||
t == PTR_TO_BTF_ID_OR_NULL ||
t == PTR_TO_PERCPU_BTF_ID)
verbose(env, "%s", kernel_type_name(reg->btf_id));
verbose(env, "(id=%d", reg->id);
if (reg_type_may_be_refcounted_or_null(t))
......@@ -2204,6 +2209,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_RDONLY_BUF_OR_NULL:
case PTR_TO_RDWR_BUF:
case PTR_TO_RDWR_BUF_OR_NULL:
case PTR_TO_PERCPU_BTF_ID:
return true;
default:
return false;
......@@ -4017,6 +4023,7 @@ static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
......@@ -4042,6 +4049,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types,
[ARG_PTR_TO_INT] = &int_ptr_types,
[ARG_PTR_TO_LONG] = &int_ptr_types,
[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
};
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
......@@ -4205,6 +4213,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
err = check_helper_mem_access(env, regno,
meta->map_ptr->value_size, false,
meta);
} else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
if (!reg->btf_id) {
verbose(env, "Helper has invalid btf_id in R%d\n", regno);
return -EACCES;
}
meta->ret_btf_id = reg->btf_id;
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
if (meta->func_id == BPF_FUNC_spin_lock) {
if (process_spin_lock(env, regno, true))
......@@ -5114,6 +5128,35 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
regs[BPF_REG_0].id = ++env->id_gen;
regs[BPF_REG_0].mem_size = meta.mem_size;
} else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
const struct btf_type *t;
mark_reg_known_zero(env, regs, BPF_REG_0);
t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
if (!btf_type_is_struct(t)) {
u32 tsize;
const struct btf_type *ret;
const char *tname;
/* resolve the type size of ksym. */
ret = btf_resolve_size(btf_vmlinux, t, &tsize);
if (IS_ERR(ret)) {
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
verbose(env, "unable to resolve the size of type '%s': %ld\n",
tname, PTR_ERR(ret));
return -EINVAL;
}
regs[BPF_REG_0].type =
fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
regs[BPF_REG_0].mem_size = tsize;
} else {
regs[BPF_REG_0].type =
fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
regs[BPF_REG_0].btf_id = meta.ret_btf_id;
}
} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
int ret_btf_id;
......@@ -7488,6 +7531,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
struct bpf_insn_aux_data *aux = cur_aux(env);
struct bpf_reg_state *regs = cur_regs(env);
struct bpf_reg_state *dst_reg;
struct bpf_map *map;
int err;
......@@ -7504,25 +7548,45 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (err)
return err;
dst_reg = &regs[insn->dst_reg];
if (insn->src_reg == 0) {
u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
regs[insn->dst_reg].type = SCALAR_VALUE;
dst_reg->type = SCALAR_VALUE;
__mark_reg_known(&regs[insn->dst_reg], imm);
return 0;
}
if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
mark_reg_known_zero(env, regs, insn->dst_reg);
dst_reg->type = aux->btf_var.reg_type;
switch (dst_reg->type) {
case PTR_TO_MEM:
dst_reg->mem_size = aux->btf_var.mem_size;
break;
case PTR_TO_BTF_ID:
case PTR_TO_PERCPU_BTF_ID:
dst_reg->btf_id = aux->btf_var.btf_id;
break;
default:
verbose(env, "bpf verifier is misconfigured\n");
return -EFAULT;
}
return 0;
}
map = env->used_maps[aux->map_index];
mark_reg_known_zero(env, regs, insn->dst_reg);
regs[insn->dst_reg].map_ptr = map;
dst_reg->map_ptr = map;
if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
regs[insn->dst_reg].off = aux->map_off;
dst_reg->type = PTR_TO_MAP_VALUE;
dst_reg->off = aux->map_off;
if (map_value_has_spin_lock(map))
regs[insn->dst_reg].id = ++env->id_gen;
dst_reg->id = ++env->id_gen;
} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
dst_reg->type = CONST_PTR_TO_MAP;
} else {
verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
......@@ -9424,6 +9488,92 @@ static int do_check(struct bpf_verifier_env *env)
return 0;
}
/* replace pseudo btf_id with kernel symbol address */
static int check_pseudo_btf_id(struct bpf_verifier_env *env,
struct bpf_insn *insn,
struct bpf_insn_aux_data *aux)
{
u32 datasec_id, type, id = insn->imm;
const struct btf_var_secinfo *vsi;
const struct btf_type *datasec;
const struct btf_type *t;
const char *sym_name;
bool percpu = false;
u64 addr;
int i;
if (!btf_vmlinux) {
verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
return -EINVAL;
}
if (insn[1].imm != 0) {
verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
return -EINVAL;
}
t = btf_type_by_id(btf_vmlinux, id);
if (!t) {
verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
return -ENOENT;
}
if (!btf_type_is_var(t)) {
verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n",
id);
return -EINVAL;
}
sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
addr = kallsyms_lookup_name(sym_name);
if (!addr) {
verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
sym_name);
return -ENOENT;
}
datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
BTF_KIND_DATASEC);
if (datasec_id > 0) {
datasec = btf_type_by_id(btf_vmlinux, datasec_id);
for_each_vsi(i, datasec, vsi) {
if (vsi->type == id) {
percpu = true;
break;
}
}
}
insn[0].imm = (u32)addr;
insn[1].imm = addr >> 32;
type = t->type;
t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
if (percpu) {
aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
aux->btf_var.btf_id = type;
} else if (!btf_type_is_struct(t)) {
const struct btf_type *ret;
const char *tname;
u32 tsize;
/* resolve the type size of ksym. */
ret = btf_resolve_size(btf_vmlinux, t, &tsize);
if (IS_ERR(ret)) {
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
tname, PTR_ERR(ret));
return -EINVAL;
}
aux->btf_var.reg_type = PTR_TO_MEM;
aux->btf_var.mem_size = tsize;
} else {
aux->btf_var.reg_type = PTR_TO_BTF_ID;
aux->btf_var.btf_id = type;
}
return 0;
}
static int check_map_prealloc(struct bpf_map *map)
{
return (map->map_type != BPF_MAP_TYPE_HASH &&
......@@ -9534,10 +9684,14 @@ static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
}
/* look for pseudo eBPF instructions that access map FDs and
* replace them with actual map pointers
/* find and rewrite pseudo imm in ld_imm64 instructions:
*
* 1. if it accesses map FD, replace it with actual map pointer.
* 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
*
* NOTE: btf_vmlinux is required for converting pseudo btf_id.
*/
static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
{
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
......@@ -9578,6 +9732,14 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
/* valid generic load 64-bit imm */
goto next_insn;
if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
aux = &env->insn_aux_data[i];
err = check_pseudo_btf_id(env, insn, aux);
if (err)
return err;
goto next_insn;
}
/* In final convert_pseudo_ld_imm64() step, this is
* converted into regular 64-bit imm load insn.
*/
......@@ -11633,10 +11795,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
if (is_priv)
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
ret = replace_map_fd_with_map_ptr(env);
if (ret < 0)
goto skip_full_check;
if (bpf_prog_is_dev_bound(env->prog->aux)) {
ret = bpf_prog_offload_verifier_prep(env->prog);
if (ret)
......@@ -11662,6 +11820,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
if (ret)
goto skip_full_check;
ret = resolve_pseudo_ldimm64(env);
if (ret < 0)
goto skip_full_check;
ret = check_cfg(env);
if (ret < 0)
goto skip_full_check;
......
......@@ -1327,6 +1327,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
case BPF_FUNC_snprintf_btf:
return &bpf_snprintf_btf_proto;
case BPF_FUNC_bpf_per_cpu_ptr:
return &bpf_per_cpu_ptr_proto;
case BPF_FUNC_bpf_this_cpu_ptr:
return &bpf_this_cpu_ptr_proto;
default:
return NULL;
}
......
......@@ -356,18 +356,36 @@ enum bpf_link_type {
#define BPF_F_SLEEPABLE (1U << 4)
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* two extensions:
*
* insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
* insn[0].imm: map fd map fd
* insn[1].imm: 0 offset into value
* insn[0].off: 0 0
* insn[1].off: 0 0
* ldimm64 rewrite: address of map address of map[0]+offset
* verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
* the following extensions:
*
* insn[0].src_reg: BPF_PSEUDO_MAP_FD
* insn[0].imm: map fd
* insn[1].imm: 0
* insn[0].off: 0
* insn[1].off: 0
* ldimm64 rewrite: address of map
* verifier type: CONST_PTR_TO_MAP
*/
#define BPF_PSEUDO_MAP_FD 1
/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE
* insn[0].imm: map fd
* insn[1].imm: offset into value
* insn[0].off: 0
* insn[1].off: 0
* ldimm64 rewrite: address of map[0]+offset
* verifier type: PTR_TO_MAP_VALUE
*/
#define BPF_PSEUDO_MAP_VALUE 2
/* insn[0].src_reg: BPF_PSEUDO_BTF_ID
* insn[0].imm: kernel btd id of VAR
* insn[1].imm: 0
* insn[0].off: 0
* insn[1].off: 0
* ldimm64 rewrite: address of the kernel variable
* verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var
* is struct/union.
*/
#define BPF_PSEUDO_BTF_ID 3
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
......@@ -3668,6 +3686,35 @@ union bpf_attr {
* Return
* The helper returns **TC_ACT_REDIRECT** on success or
* **TC_ACT_SHOT** on error.
*
* void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
* Description
* Take a pointer to a percpu ksym, *percpu_ptr*, and return a
* pointer to the percpu kernel variable on *cpu*. A ksym is an
* extern variable decorated with '__ksym'. For ksym, there is a
* global var (either static or global) defined of the same name
* in the kernel. The ksym is percpu if the global var is percpu.
* The returned pointer points to the global percpu var on *cpu*.
*
* bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
* kernel, except that bpf_per_cpu_ptr() may return NULL. This
* happens if *cpu* is larger than nr_cpu_ids. The caller of
* bpf_per_cpu_ptr() must check the returned value.
* Return
* A pointer pointing to the kernel percpu variable on *cpu*, or
* NULL, if *cpu* is invalid.
*
* void *bpf_this_cpu_ptr(const void *percpu_ptr)
* Description
* Take a pointer to a percpu ksym, *percpu_ptr*, and return a
* pointer to the percpu kernel variable on this cpu. See the
* description of 'ksym' in **bpf_per_cpu_ptr**\ ().
*
* bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in
* the kernel. Different from **bpf_per_cpu_ptr**\ (), it would
* never return NULL.
* Return
* A pointer pointing to the kernel percpu variable on this cpu.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -3823,6 +3870,8 @@ union bpf_attr {
FN(seq_printf_btf), \
FN(skb_cgroup_classid), \
FN(redirect_neigh), \
FN(bpf_per_cpu_ptr), \
FN(bpf_this_cpu_ptr), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
......
......@@ -390,6 +390,12 @@ struct extern_desc {
} kcfg;
struct {
unsigned long long addr;
/* target btf_id of the corresponding kernel var. */
int vmlinux_btf_id;
/* local btf_id of the ksym extern's type. */
__u32 type_id;
} ksym;
};
};
......@@ -2522,12 +2528,23 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
{
bool need_vmlinux_btf = false;
struct bpf_program *prog;
int err;
int i, err;
/* CO-RE relocations need kernel BTF */
if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
need_vmlinux_btf = true;
/* Support for typed ksyms needs kernel BTF */
for (i = 0; i < obj->nr_extern; i++) {
const struct extern_desc *ext;
ext = &obj->externs[i];
if (ext->type == EXT_KSYM && ext->ksym.type_id) {
need_vmlinux_btf = true;
break;
}
}
bpf_object__for_each_program(prog, obj) {
if (!prog->load)
continue;
......@@ -3156,16 +3173,10 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
return -ENOTSUP;
}
} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
const struct btf_type *vt;
ksym_sec = sec;
ext->type = EXT_KSYM;
vt = skip_mods_and_typedefs(obj->btf, t->type, NULL);
if (!btf_is_void(vt)) {
pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name);
return -ENOTSUP;
}
skip_mods_and_typedefs(obj->btf, t->type,
&ext->ksym.type_id);
} else {
pr_warn("unrecognized extern section '%s'\n", sec_name);
return -ENOTSUP;
......@@ -5800,8 +5811,13 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
insn[1].imm = ext->kcfg.data_off;
} else /* EXT_KSYM */ {
insn[0].imm = (__u32)ext->ksym.addr;
insn[1].imm = ext->ksym.addr >> 32;
if (ext->ksym.type_id) { /* typed ksyms */
insn[0].src_reg = BPF_PSEUDO_BTF_ID;
insn[0].imm = ext->ksym.vmlinux_btf_id;
} else { /* typeless ksyms */
insn[0].imm = (__u32)ext->ksym.addr;
insn[1].imm = ext->ksym.addr >> 32;
}
}
relo->processed = true;
break;
......@@ -6933,10 +6949,72 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
return err;
}
static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
{
struct extern_desc *ext;
int i, id;
for (i = 0; i < obj->nr_extern; i++) {
const struct btf_type *targ_var, *targ_type;
__u32 targ_type_id, local_type_id;
const char *targ_var_name;
int ret;
ext = &obj->externs[i];
if (ext->type != EXT_KSYM || !ext->ksym.type_id)
continue;
id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name,
BTF_KIND_VAR);
if (id <= 0) {
pr_warn("extern (ksym) '%s': failed to find BTF ID in vmlinux BTF.\n",
ext->name);
return -ESRCH;
}
/* find local type_id */
local_type_id = ext->ksym.type_id;
/* find target type_id */
targ_var = btf__type_by_id(obj->btf_vmlinux, id);
targ_var_name = btf__name_by_offset(obj->btf_vmlinux,
targ_var->name_off);
targ_type = skip_mods_and_typedefs(obj->btf_vmlinux,
targ_var->type,
&targ_type_id);
ret = bpf_core_types_are_compat(obj->btf, local_type_id,
obj->btf_vmlinux, targ_type_id);
if (ret <= 0) {
const struct btf_type *local_type;
const char *targ_name, *local_name;
local_type = btf__type_by_id(obj->btf, local_type_id);
local_name = btf__name_by_offset(obj->btf,
local_type->name_off);
targ_name = btf__name_by_offset(obj->btf_vmlinux,
targ_type->name_off);
pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
ext->name, local_type_id,
btf_kind_str(local_type), local_name, targ_type_id,
btf_kind_str(targ_type), targ_name);
return -EINVAL;
}
ext->is_set = true;
ext->ksym.vmlinux_btf_id = id;
pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n",
ext->name, id, btf_kind_str(targ_var), targ_var_name);
}
return 0;
}
static int bpf_object__resolve_externs(struct bpf_object *obj,
const char *extra_kconfig)
{
bool need_config = false, need_kallsyms = false;
bool need_vmlinux_btf = false;
struct extern_desc *ext;
void *kcfg_data = NULL;
int err, i;
......@@ -6967,7 +7045,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
strncmp(ext->name, "CONFIG_", 7) == 0) {
need_config = true;
} else if (ext->type == EXT_KSYM) {
need_kallsyms = true;
if (ext->ksym.type_id)
need_vmlinux_btf = true;
else
need_kallsyms = true;
} else {
pr_warn("unrecognized extern '%s'\n", ext->name);
return -EINVAL;
......@@ -6996,6 +7077,11 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
if (err)
return -EINVAL;
}
if (need_vmlinux_btf) {
err = bpf_object__resolve_ksyms_btf_id(obj);
if (err)
return -EINVAL;
}
for (i = 0; i < obj->nr_extern; i++) {
ext = &obj->externs[i];
......@@ -7028,10 +7114,10 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
}
err = bpf_object__probe_loading(obj);
err = err ? : bpf_object__load_vmlinux_btf(obj);
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
err = err ? : bpf_object__sanitize_and_load_btf(obj);
err = err ? : bpf_object__sanitize_maps(obj);
err = err ? : bpf_object__load_vmlinux_btf(obj);
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
err = err ? : bpf_object__create_maps(obj);
err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
......
......@@ -7,40 +7,28 @@
static int duration;
static __u64 kallsyms_find(const char *sym)
{
char type, name[500];
__u64 addr, res = 0;
FILE *f;
f = fopen("/proc/kallsyms", "r");
if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno))
return 0;
while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) {
if (strcmp(name, sym) == 0) {
res = addr;
goto out;
}
}
CHECK(false, "not_found", "symbol %s not found\n", sym);
out:
fclose(f);
return res;
}
void test_ksyms(void)
{
__u64 per_cpu_start_addr = kallsyms_find("__per_cpu_start");
__u64 link_fops_addr = kallsyms_find("bpf_link_fops");
const char *btf_path = "/sys/kernel/btf/vmlinux";
struct test_ksyms *skel;
struct test_ksyms__data *data;
__u64 link_fops_addr, per_cpu_start_addr;
struct stat st;
__u64 btf_size;
int err;
err = kallsyms_find("bpf_link_fops", &link_fops_addr);
if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
return;
if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_link_fops' not found\n"))
return;
err = kallsyms_find("__per_cpu_start", &per_cpu_start_addr);
if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
return;
if (CHECK(err == -ENOENT, "ksym_find", "symbol 'per_cpu_start' not found\n"))
return;
if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno))
return;
btf_size = st.st_size;
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Google */
#include <test_progs.h>
#include <bpf/libbpf.h>
#include <bpf/btf.h>
#include "test_ksyms_btf.skel.h"
static int duration;
void test_ksyms_btf(void)
{
__u64 runqueues_addr, bpf_prog_active_addr;
__u32 this_rq_cpu;
int this_bpf_prog_active;
struct test_ksyms_btf *skel = NULL;
struct test_ksyms_btf__data *data;
struct btf *btf;
int percpu_datasec;
int err;
err = kallsyms_find("runqueues", &runqueues_addr);
if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
return;
if (CHECK(err == -ENOENT, "ksym_find", "symbol 'runqueues' not found\n"))
return;
err = kallsyms_find("bpf_prog_active", &bpf_prog_active_addr);
if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
return;
if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_prog_active' not found\n"))
return;
btf = libbpf_find_kernel_btf();
if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
PTR_ERR(btf)))
return;
percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
BTF_KIND_DATASEC);
if (percpu_datasec < 0) {
printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n",
__func__);
test__skip();
goto cleanup;
}
skel = test_ksyms_btf__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
goto cleanup;
err = test_ksyms_btf__attach(skel);
if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
goto cleanup;
/* trigger tracepoint */
usleep(1);
data = skel->data;
CHECK(data->out__runqueues_addr != runqueues_addr, "runqueues_addr",
"got %llu, exp %llu\n",
(unsigned long long)data->out__runqueues_addr,
(unsigned long long)runqueues_addr);
CHECK(data->out__bpf_prog_active_addr != bpf_prog_active_addr, "bpf_prog_active_addr",
"got %llu, exp %llu\n",
(unsigned long long)data->out__bpf_prog_active_addr,
(unsigned long long)bpf_prog_active_addr);
CHECK(data->out__rq_cpu == -1, "rq_cpu",
"got %u, exp != -1\n", data->out__rq_cpu);
CHECK(data->out__bpf_prog_active < 0, "bpf_prog_active",
"got %d, exp >= 0\n", data->out__bpf_prog_active);
CHECK(data->out__cpu_0_rq_cpu != 0, "cpu_rq(0)->cpu",
"got %u, exp 0\n", data->out__cpu_0_rq_cpu);
this_rq_cpu = data->out__this_rq_cpu;
CHECK(this_rq_cpu != data->out__rq_cpu, "this_rq_cpu",
"got %u, exp %u\n", this_rq_cpu, data->out__rq_cpu);
this_bpf_prog_active = data->out__this_bpf_prog_active;
CHECK(this_bpf_prog_active != data->out__bpf_prog_active, "this_bpf_prog_active",
"got %d, exp %d\n", this_bpf_prog_active,
data->out__bpf_prog_active);
cleanup:
btf__free(btf);
test_ksyms_btf__destroy(skel);
}
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Google */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
__u64 out__runqueues_addr = -1;
__u64 out__bpf_prog_active_addr = -1;
__u32 out__rq_cpu = -1; /* percpu struct fields */
int out__bpf_prog_active = -1; /* percpu int */
__u32 out__this_rq_cpu = -1;
int out__this_bpf_prog_active = -1;
__u32 out__cpu_0_rq_cpu = -1; /* cpu_rq(0)->cpu */
extern const struct rq runqueues __ksym; /* struct type global var. */
extern const int bpf_prog_active __ksym; /* int type global var. */
SEC("raw_tp/sys_enter")
int handler(const void *ctx)
{
struct rq *rq;
int *active;
__u32 cpu;
out__runqueues_addr = (__u64)&runqueues;
out__bpf_prog_active_addr = (__u64)&bpf_prog_active;
cpu = bpf_get_smp_processor_id();
/* test bpf_per_cpu_ptr() */
rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu);
if (rq)
out__rq_cpu = rq->cpu;
active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
if (active)
out__bpf_prog_active = *active;
rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
if (rq) /* should always be valid, but we can't spare the check. */
out__cpu_0_rq_cpu = rq->cpu;
/* test bpf_this_cpu_ptr */
rq = (struct rq *)bpf_this_cpu_ptr(&runqueues);
out__this_rq_cpu = rq->cpu;
active = (int *)bpf_this_cpu_ptr(&bpf_prog_active);
out__this_bpf_prog_active = *active;
return 0;
}
char _license[] SEC("license") = "GPL";
......@@ -90,6 +90,33 @@ long ksym_get_addr(const char *name)
return 0;
}
/* open kallsyms and read symbol addresses on the fly. Without caching all symbols,
* this is faster than load + find.
*/
int kallsyms_find(const char *sym, unsigned long long *addr)
{
char type, name[500];
unsigned long long value;
int err = 0;
FILE *f;
f = fopen("/proc/kallsyms", "r");
if (!f)
return -EINVAL;
while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) {
if (strcmp(name, sym) == 0) {
*addr = value;
goto out;
}
}
err = -ENOENT;
out:
fclose(f);
return err;
}
void read_trace_pipe(void)
{
int trace_fd;
......
......@@ -12,6 +12,10 @@ struct ksym {
int load_kallsyms(void);
struct ksym *ksym_search(long key);
long ksym_get_addr(const char *name);
/* open kallsyms and find addresses on the fly, faster than load + search. */
int kallsyms_find(const char *sym, unsigned long long *addr);
void read_trace_pipe(void);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment