Commit c8e18754 authored by Dave Marchevsky's avatar Dave Marchevsky Committed by Alexei Starovoitov

bpf: Support __kptr to local kptrs

If a PTR_TO_BTF_ID type comes from program BTF - not vmlinux or module
BTF - it must have been allocated by bpf_obj_new and therefore must be
free'd with bpf_obj_drop. Such a PTR_TO_BTF_ID is considered a "local
kptr" and is tagged with MEM_ALLOC type tag by bpf_obj_new.

This patch adds support for treating __kptr-tagged pointers to "local
kptrs" as having an implicit bpf_obj_drop destructor for referenced kptr
acquire / release semantics. Consider the following example:

  struct node_data {
          long key;
          long data;
          struct bpf_rb_node node;
  };

  struct map_value {
          struct node_data __kptr *node;
  };

  struct {
          __uint(type, BPF_MAP_TYPE_ARRAY);
          __type(key, int);
          __type(value, struct map_value);
          __uint(max_entries, 1);
  } some_nodes SEC(".maps");

If struct node_data had a matching definition in kernel BTF, the verifier would
expect a destructor for the type to be registered. Since struct node_data does
not match any type in kernel BTF, the verifier knows that there is no kfunc
that provides a PTR_TO_BTF_ID to this type, and that such a PTR_TO_BTF_ID can
only come from bpf_obj_new. So instead of searching for a registered dtor,
a bpf_obj_drop dtor can be assumed.

This allows the runtime to properly destruct such kptrs in
bpf_obj_free_fields, which enables maps to clean up map_vals w/ such
kptrs when going away.

Implementation notes:
  * "kernel_btf" variable is renamed to "kptr_btf" in btf_parse_kptr.
    Before this patch, the variable would only ever point to vmlinux or
    module BTFs, but now it can point to some program BTF for local kptr
    type. It's later used to populate the (btf, btf_id) pair in kptr btf
    field.
  * It's necessary to btf_get the program BTF when populating btf_field
    for local kptr. btf_record_free later does a btf_put.
  * Behavior for non-local referenced kptrs is not modified, as
    bpf_find_btf_id helper only searches vmlinux and module BTFs for
    matching BTF type. If such a type is found, btf_field_kptr's btf will
    pass btf_is_kernel check, and the associated release function is
    some one-argument dtor. If btf_is_kernel check fails, associated
    release function is two-arg bpf_obj_drop_impl. Before this patch
    only btf_field_kptr's w/ kernel or module BTFs were created.
Signed-off-by: default avatarDave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230310230743.2320707-2-davemarchevsky@fb.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent c1f9e14e
...@@ -189,10 +189,19 @@ enum btf_field_type { ...@@ -189,10 +189,19 @@ enum btf_field_type {
BPF_RB_NODE | BPF_RB_ROOT, BPF_RB_NODE | BPF_RB_ROOT,
}; };
typedef void (*btf_dtor_kfunc_t)(void *);
typedef void (*btf_dtor_obj_drop)(void *, const struct btf_record *);
struct btf_field_kptr { struct btf_field_kptr {
struct btf *btf; struct btf *btf;
struct module *module; struct module *module;
union {
/* dtor used if btf_is_kernel(btf), otherwise the type
* is program-allocated and obj_drop is used
*/
btf_dtor_kfunc_t dtor; btf_dtor_kfunc_t dtor;
btf_dtor_obj_drop obj_drop;
};
u32 btf_id; u32 btf_id;
}; };
......
...@@ -121,8 +121,6 @@ struct btf_struct_metas { ...@@ -121,8 +121,6 @@ struct btf_struct_metas {
struct btf_struct_meta types[]; struct btf_struct_meta types[];
}; };
typedef void (*btf_dtor_kfunc_t)(void *);
extern const struct file_operations btf_fops; extern const struct file_operations btf_fops;
void btf_get(struct btf *btf); void btf_get(struct btf *btf);
......
...@@ -3551,12 +3551,17 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t, ...@@ -3551,12 +3551,17 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
return -EINVAL; return -EINVAL;
} }
extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec);
static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
struct btf_field_info *info) struct btf_field_info *info)
{ {
struct module *mod = NULL; struct module *mod = NULL;
const struct btf_type *t; const struct btf_type *t;
struct btf *kernel_btf; /* If a matching btf type is found in kernel or module BTFs, kptr_ref
* is that BTF, otherwise it's program BTF
*/
struct btf *kptr_btf;
int ret; int ret;
s32 id; s32 id;
...@@ -3565,7 +3570,20 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, ...@@ -3565,7 +3570,20 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
*/ */
t = btf_type_by_id(btf, info->kptr.type_id); t = btf_type_by_id(btf, info->kptr.type_id);
id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info), id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info),
&kernel_btf); &kptr_btf);
if (id == -ENOENT) {
/* btf_parse_kptr should only be called w/ btf = program BTF */
WARN_ON_ONCE(btf_is_kernel(btf));
/* Type exists only in program BTF. Assume that it's a MEM_ALLOC
* kptr allocated via bpf_obj_new
*/
field->kptr.dtor = (void *)&__bpf_obj_drop_impl;
id = info->kptr.type_id;
kptr_btf = (struct btf *)btf;
btf_get(kptr_btf);
goto found_dtor;
}
if (id < 0) if (id < 0)
return id; return id;
...@@ -3582,20 +3600,20 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, ...@@ -3582,20 +3600,20 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
* can be used as a referenced pointer and be stored in a map at * can be used as a referenced pointer and be stored in a map at
* the same time. * the same time.
*/ */
dtor_btf_id = btf_find_dtor_kfunc(kernel_btf, id); dtor_btf_id = btf_find_dtor_kfunc(kptr_btf, id);
if (dtor_btf_id < 0) { if (dtor_btf_id < 0) {
ret = dtor_btf_id; ret = dtor_btf_id;
goto end_btf; goto end_btf;
} }
dtor_func = btf_type_by_id(kernel_btf, dtor_btf_id); dtor_func = btf_type_by_id(kptr_btf, dtor_btf_id);
if (!dtor_func) { if (!dtor_func) {
ret = -ENOENT; ret = -ENOENT;
goto end_btf; goto end_btf;
} }
if (btf_is_module(kernel_btf)) { if (btf_is_module(kptr_btf)) {
mod = btf_try_get_module(kernel_btf); mod = btf_try_get_module(kptr_btf);
if (!mod) { if (!mod) {
ret = -ENXIO; ret = -ENXIO;
goto end_btf; goto end_btf;
...@@ -3605,7 +3623,7 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, ...@@ -3605,7 +3623,7 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
/* We already verified dtor_func to be btf_type_is_func /* We already verified dtor_func to be btf_type_is_func
* in register_btf_id_dtor_kfuncs. * in register_btf_id_dtor_kfuncs.
*/ */
dtor_func_name = __btf_name_by_offset(kernel_btf, dtor_func->name_off); dtor_func_name = __btf_name_by_offset(kptr_btf, dtor_func->name_off);
addr = kallsyms_lookup_name(dtor_func_name); addr = kallsyms_lookup_name(dtor_func_name);
if (!addr) { if (!addr) {
ret = -EINVAL; ret = -EINVAL;
...@@ -3614,14 +3632,15 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, ...@@ -3614,14 +3632,15 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
field->kptr.dtor = (void *)addr; field->kptr.dtor = (void *)addr;
} }
found_dtor:
field->kptr.btf_id = id; field->kptr.btf_id = id;
field->kptr.btf = kernel_btf; field->kptr.btf = kptr_btf;
field->kptr.module = mod; field->kptr.module = mod;
return 0; return 0;
end_mod: end_mod:
module_put(mod); module_put(mod);
end_btf: end_btf:
btf_put(kernel_btf); btf_put(kptr_btf);
return ret; return ret;
} }
......
...@@ -1896,14 +1896,19 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) ...@@ -1896,14 +1896,19 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
return p; return p;
} }
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
{
if (rec)
bpf_obj_free_fields(rec, p);
bpf_mem_free(&bpf_global_ma, p);
}
__bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
{ {
struct btf_struct_meta *meta = meta__ign; struct btf_struct_meta *meta = meta__ign;
void *p = p__alloc; void *p = p__alloc;
if (meta) __bpf_obj_drop_impl(p, meta ? meta->record : NULL);
bpf_obj_free_fields(meta->record, p);
bpf_mem_free(&bpf_global_ma, p);
} }
static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail) static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail)
......
...@@ -659,8 +659,10 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) ...@@ -659,8 +659,10 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
return; return;
fields = rec->fields; fields = rec->fields;
for (i = 0; i < rec->cnt; i++) { for (i = 0; i < rec->cnt; i++) {
struct btf_struct_meta *pointee_struct_meta;
const struct btf_field *field = &fields[i]; const struct btf_field *field = &fields[i];
void *field_ptr = obj + field->offset; void *field_ptr = obj + field->offset;
void *xchgd_field;
switch (fields[i].type) { switch (fields[i].type) {
case BPF_SPIN_LOCK: case BPF_SPIN_LOCK:
...@@ -672,7 +674,17 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) ...@@ -672,7 +674,17 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
WRITE_ONCE(*(u64 *)field_ptr, 0); WRITE_ONCE(*(u64 *)field_ptr, 0);
break; break;
case BPF_KPTR_REF: case BPF_KPTR_REF:
field->kptr.dtor((void *)xchg((unsigned long *)field_ptr, 0)); xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0);
if (!btf_is_kernel(field->kptr.btf)) {
pointee_struct_meta = btf_find_struct_meta(field->kptr.btf,
field->kptr.btf_id);
WARN_ON_ONCE(!pointee_struct_meta);
field->kptr.obj_drop(xchgd_field, pointee_struct_meta ?
pointee_struct_meta->record :
NULL);
} else {
field->kptr.dtor(xchgd_field);
}
break; break;
case BPF_LIST_HEAD: case BPF_LIST_HEAD:
if (WARN_ON_ONCE(rec->spin_lock_off < 0)) if (WARN_ON_ONCE(rec->spin_lock_off < 0))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment