Commit cc0f8353 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf: add bpf_for_each_map_elem() helper'

Yonghong Song says:

====================

This patch set introduced bpf_for_each_map_elem() helper.
The helper permits bpf program iterates through all elements
for a particular map.

The work originally inspired by an internal discussion where
firewall rules are kept in a map and bpf prog wants to
check packet 5 tuples against all rules in the map.
A bounded loop can be used but it has a few drawbacks.
As the loop iteration goes up, verification time goes up too.
For really large maps, verification may fail.
A helper which abstracts out the loop itself will not have
verification time issue.

A recent discussion in [1] involves to iterate all hash map
elements in bpf program. Currently iterating all hashmap elements
in bpf program is not easy if key space is really big.
Having a helper to abstract out the loop itself is even more
meaningful.

The proposed helper signature looks like:
  long bpf_for_each_map_elem(map, callback_fn, callback_ctx, flags)
where callback_fn is a static function and callback_ctx is
a piece of data allocated on the caller stack which can be
accessed by the callback_fn. The callback_fn signature might be
different for different maps. For example, for hash/array maps,
the signature is
  long callback_fn(map, key, val, callback_ctx)

In the rest of series, Patches 1/2/3/4 did some refactoring. Patch 5
implemented core kernel support for the helper. Patches 6 and 7
added hashmap and arraymap support. Patches 8/9 added libbpf
support. Patch 10 added bpftool support. Patches 11 and 12 added
selftests for hashmap and arraymap.

[1]: https://lore.kernel.org/bpf/20210122205415.113822-1-xiyou.wangcong@gmail.com/

Changelogs:
  v4 -> v5:
    - rebase on top of bpf-next.
  v3 -> v4:
    - better refactoring of check_func_call(), calculate subprogno outside
      of __check_func_call() helper. (Andrii)
    - better documentation (like the list of supported maps and their
      callback signatures) in uapi header. (Andrii)
    - implement and use ASSERT_LT in selftests. (Andrii)
    - a few other minor changes.
  v2 -> v3:
    - add comments in retrieve_ptr_limit(), which is in sanitize_ptr_alu(),
      to clarify the code is not executed for PTR_TO_MAP_KEY handling,
      but code is manually tested. (Alexei)
    - require BTF for callback function. (Alexei)
    - simplify hashmap/arraymap callback return handling as return value
      [0, 1] has been enforced by the verifier. (Alexei)
    - also mark global subprog (if used in ld_imm64) as RELO_SUBPROG_ADDR. (Andrii)
    - handle the condition to mark RELO_SUBPROG_ADDR properly. (Andrii)
    - make bpftool subprog insn offset dumping consist with pcrel calls. (Andrii)
  v1 -> v2:
    - setup callee frame in check_helper_call() and then proceed to verify
      helper return value as normal (Alexei)
    - use meta data to keep track of map/func pointer to avoid hard coding
      the register number (Alexei)
    - verify callback_fn return value range [0, 1]. (Alexei)
    - add migrate_{disable, enable} to ensure percpu value is the one
      bpf program expects to see. (Alexei)
    - change bpf_for_each_map_elem() return value to the number of iterated
      elements. (Andrii)
    - Change libbpf pseudo_func relo name to RELO_SUBPROG_ADDR and use
      more rigid checking for the relocation. (Andrii)
    - Better format to print out subprog address with bpftool. (Andrii)
    - Use bpf_prog_test_run to trigger bpf run, instead of bpf_iter. (Andrii)
    - Other misc changes.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 86fd1665 6b9e3331
......@@ -39,6 +39,7 @@ struct bpf_local_storage;
struct bpf_local_storage_map;
struct kobject;
struct mem_cgroup;
struct bpf_func_state;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
......@@ -129,6 +130,13 @@ struct bpf_map_ops {
bool (*map_meta_equal)(const struct bpf_map *meta0,
const struct bpf_map *meta1);
int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee);
int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn,
void *callback_ctx, u64 flags);
/* BTF name and id of struct allocated by map_alloc */
const char * const map_btf_name;
int *map_btf_id;
......@@ -295,6 +303,8 @@ enum bpf_arg_type {
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
ARG_PTR_TO_FUNC, /* pointer to a bpf program function */
ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */
__BPF_ARG_TYPE_MAX,
};
......@@ -411,6 +421,8 @@ enum bpf_reg_type {
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
PTR_TO_FUNC, /* reg points to a bpf program function */
PTR_TO_MAP_KEY, /* reg points to a map element key */
};
/* The information passed from prog-specific *_is_valid_access
......@@ -1385,6 +1397,10 @@ void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux,
int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux,
struct bpf_link_info *info);
int map_set_for_each_callback_args(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee);
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
......@@ -1887,6 +1903,7 @@ extern const struct bpf_func_proto bpf_sock_from_file_proto;
extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
extern const struct bpf_func_proto bpf_task_storage_get_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
......
......@@ -68,6 +68,8 @@ struct bpf_reg_state {
unsigned long raw1;
unsigned long raw2;
} raw;
u32 subprogno; /* for PTR_TO_FUNC */
};
/* For PTR_TO_PACKET, used to find other pointers with the same variable
* offset, so they can share range knowledge.
......@@ -204,6 +206,7 @@ struct bpf_func_state {
int acquired_refs;
struct bpf_reference_state *refs;
int allocated_stack;
bool in_callback_fn;
struct bpf_stack_state *stack;
};
......
......@@ -393,6 +393,15 @@ enum bpf_link_type {
* is struct/union.
*/
#define BPF_PSEUDO_BTF_ID 3
/* insn[0].src_reg: BPF_PSEUDO_FUNC
* insn[0].imm: insn offset to the func
* insn[1].imm: 0
* insn[0].off: 0
* insn[1].off: 0
* ldimm64 rewrite: address of the function
* verifier type: PTR_TO_FUNC.
*/
#define BPF_PSEUDO_FUNC 4
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
......@@ -3909,6 +3918,34 @@ union bpf_attr {
* * **BPF_MTU_CHK_RET_FRAG_NEEDED**
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
*
* long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
* Description
* For each element in **map**, call **callback_fn** function with
* **map**, **callback_ctx** and other map-specific parameters.
* The **callback_fn** should be a static function and
* the **callback_ctx** should be a pointer to the stack.
* The **flags** is used to control certain aspects of the helper.
* Currently, the **flags** must be 0.
*
* The following are a list of supported map types and their
* respective expected callback signatures:
*
* BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
* BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
* BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
*
* long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
*
* For per_cpu maps, the map_value is the value on the cpu where the
* bpf_prog is running.
*
* If **callback_fn** return 0, the helper will continue to the next
* element. If return value is 1, the helper will skip the rest of
* elements and return. Other return values are not used now.
*
* Return
* The number of traversed map elements for success, **-EINVAL** for
* invalid **flags**.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -4075,6 +4112,7 @@ union bpf_attr {
FN(ima_inode_hash), \
FN(sock_from_file), \
FN(check_mtu), \
FN(for_each_map_elem), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
......
......@@ -625,6 +625,42 @@ static const struct bpf_iter_seq_info iter_seq_info = {
.seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
};
static int bpf_for_each_array_elem(struct bpf_map *map, void *callback_fn,
void *callback_ctx, u64 flags)
{
u32 i, key, num_elems = 0;
struct bpf_array *array;
bool is_percpu;
u64 ret = 0;
void *val;
if (flags != 0)
return -EINVAL;
is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
array = container_of(map, struct bpf_array, map);
if (is_percpu)
migrate_disable();
for (i = 0; i < map->max_entries; i++) {
if (is_percpu)
val = this_cpu_ptr(array->pptrs[i]);
else
val = array->value + array->elem_size * i;
num_elems++;
key = i;
ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
(u64)(long)&key, (u64)(long)val,
(u64)(long)callback_ctx, 0);
/* return value: 0 - continue, 1 - stop and return */
if (ret)
break;
}
if (is_percpu)
migrate_enable();
return num_elems;
}
static int array_map_btf_id;
const struct bpf_map_ops array_map_ops = {
.map_meta_equal = array_map_meta_equal,
......@@ -643,6 +679,8 @@ const struct bpf_map_ops array_map_ops = {
.map_check_btf = array_map_check_btf,
.map_lookup_batch = generic_map_lookup_batch,
.map_update_batch = generic_map_update_batch,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_array_elem,
.map_btf_name = "bpf_array",
.map_btf_id = &array_map_btf_id,
.iter_seq_info = &iter_seq_info,
......@@ -660,6 +698,8 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_delete_elem = array_map_delete_elem,
.map_seq_show_elem = percpu_array_map_seq_show_elem,
.map_check_btf = array_map_check_btf,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_array_elem,
.map_btf_name = "bpf_array",
.map_btf_id = &percpu_array_map_btf_id,
.iter_seq_info = &iter_seq_info,
......
......@@ -675,3 +675,19 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
*/
return ret == 0 ? 0 : -EAGAIN;
}
BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn,
void *, callback_ctx, u64, flags)
{
return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags);
}
const struct bpf_func_proto bpf_for_each_map_elem_proto = {
.func = bpf_for_each_map_elem,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_FUNC,
.arg3_type = ARG_PTR_TO_STACK_OR_NULL,
.arg4_type = ARG_ANYTHING,
};
......@@ -1869,6 +1869,63 @@ static const struct bpf_iter_seq_info iter_seq_info = {
.seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info),
};
static int bpf_for_each_hash_elem(struct bpf_map *map, void *callback_fn,
void *callback_ctx, u64 flags)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct hlist_nulls_head *head;
struct hlist_nulls_node *n;
struct htab_elem *elem;
u32 roundup_key_size;
int i, num_elems = 0;
void __percpu *pptr;
struct bucket *b;
void *key, *val;
bool is_percpu;
u64 ret = 0;
if (flags != 0)
return -EINVAL;
is_percpu = htab_is_percpu(htab);
roundup_key_size = round_up(map->key_size, 8);
/* disable migration so percpu value prepared here will be the
* same as the one seen by the bpf program with bpf_map_lookup_elem().
*/
if (is_percpu)
migrate_disable();
for (i = 0; i < htab->n_buckets; i++) {
b = &htab->buckets[i];
rcu_read_lock();
head = &b->head;
hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
key = elem->key;
if (is_percpu) {
/* current cpu value for percpu map */
pptr = htab_elem_get_ptr(elem, map->key_size);
val = this_cpu_ptr(pptr);
} else {
val = elem->key + roundup_key_size;
}
num_elems++;
ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
(u64)(long)key, (u64)(long)val,
(u64)(long)callback_ctx, 0);
/* return value: 0 - continue, 1 - stop and return */
if (ret) {
rcu_read_unlock();
goto out;
}
}
rcu_read_unlock();
}
out:
if (is_percpu)
migrate_enable();
return num_elems;
}
static int htab_map_btf_id;
const struct bpf_map_ops htab_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
......@@ -1881,6 +1938,8 @@ const struct bpf_map_ops htab_map_ops = {
.map_delete_elem = htab_map_delete_elem,
.map_gen_lookup = htab_map_gen_lookup,
.map_seq_show_elem = htab_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_map_btf_id,
......@@ -1900,6 +1959,8 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_delete_elem = htab_lru_map_delete_elem,
.map_gen_lookup = htab_lru_map_gen_lookup,
.map_seq_show_elem = htab_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab_lru),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_lru_map_btf_id,
......@@ -2019,6 +2080,8 @@ const struct bpf_map_ops htab_percpu_map_ops = {
.map_update_elem = htab_percpu_map_update_elem,
.map_delete_elem = htab_map_delete_elem,
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab_percpu),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_percpu_map_btf_id,
......@@ -2036,6 +2099,8 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
.map_update_elem = htab_lru_percpu_map_update_elem,
.map_delete_elem = htab_lru_map_delete_elem,
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab_lru_percpu),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_lru_percpu_map_btf_id,
......
......@@ -708,6 +708,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_ringbuf_discard_proto;
case BPF_FUNC_ringbuf_query:
return &bpf_ringbuf_query_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
default:
break;
}
......
......@@ -234,6 +234,12 @@ static bool bpf_pseudo_call(const struct bpf_insn *insn)
insn->src_reg == BPF_PSEUDO_CALL;
}
static bool bpf_pseudo_func(const struct bpf_insn *insn)
{
return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
insn->src_reg == BPF_PSEUDO_FUNC;
}
struct bpf_call_arg_meta {
struct bpf_map *map_ptr;
bool raw_mode;
......@@ -248,6 +254,7 @@ struct bpf_call_arg_meta {
u32 btf_id;
struct btf *ret_btf;
u32 ret_btf_id;
u32 subprogno;
};
struct btf *btf_vmlinux;
......@@ -390,6 +397,24 @@ __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
env->prev_linfo = linfo;
}
static void verbose_invalid_scalar(struct bpf_verifier_env *env,
struct bpf_reg_state *reg,
struct tnum *range, const char *ctx,
const char *reg_name)
{
char tn_buf[48];
verbose(env, "At %s the register %s ", ctx, reg_name);
if (!tnum_is_unknown(reg->var_off)) {
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env, "has value %s", tn_buf);
} else {
verbose(env, "has unknown scalar value");
}
tnum_strn(tn_buf, sizeof(tn_buf), *range);
verbose(env, " should have been in %s\n", tn_buf);
}
static bool type_is_pkt_pointer(enum bpf_reg_type type)
{
return type == PTR_TO_PACKET ||
......@@ -409,6 +434,7 @@ static bool reg_type_not_null(enum bpf_reg_type type)
return type == PTR_TO_SOCKET ||
type == PTR_TO_TCP_SOCK ||
type == PTR_TO_MAP_VALUE ||
type == PTR_TO_MAP_KEY ||
type == PTR_TO_SOCK_COMMON;
}
......@@ -451,7 +477,8 @@ static bool arg_type_may_be_null(enum bpf_arg_type type)
type == ARG_PTR_TO_MEM_OR_NULL ||
type == ARG_PTR_TO_CTX_OR_NULL ||
type == ARG_PTR_TO_SOCKET_OR_NULL ||
type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
type == ARG_PTR_TO_ALLOC_MEM_OR_NULL ||
type == ARG_PTR_TO_STACK_OR_NULL;
}
/* Determine whether the function releases some resources allocated by another
......@@ -534,6 +561,8 @@ static const char * const reg_type_str[] = {
[PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
[PTR_TO_RDWR_BUF] = "rdwr_buf",
[PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
[PTR_TO_FUNC] = "func",
[PTR_TO_MAP_KEY] = "map_key",
};
static char slot_type_char[] = {
......@@ -605,6 +634,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
if (type_is_pkt_pointer(t))
verbose(env, ",r=%d", reg->range);
else if (t == CONST_PTR_TO_MAP ||
t == PTR_TO_MAP_KEY ||
t == PTR_TO_MAP_VALUE ||
t == PTR_TO_MAP_VALUE_OR_NULL)
verbose(env, ",ks=%d,vs=%d",
......@@ -1512,7 +1542,7 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
}
ret = find_subprog(env, off);
if (ret >= 0)
return 0;
return ret;
if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
verbose(env, "too many subprograms\n");
return -E2BIG;
......@@ -1520,7 +1550,7 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
env->subprog_info[env->subprog_cnt++].start = off;
sort(env->subprog_info, env->subprog_cnt,
sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
return 0;
return env->subprog_cnt - 1;
}
static int check_subprogs(struct bpf_verifier_env *env)
......@@ -1537,6 +1567,19 @@ static int check_subprogs(struct bpf_verifier_env *env)
/* determine subprog starts. The end is one before the next starts */
for (i = 0; i < insn_cnt; i++) {
if (bpf_pseudo_func(insn + i)) {
if (!env->bpf_capable) {
verbose(env,
"function pointers are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
return -EPERM;
}
ret = add_subprog(env, i + insn[i].imm + 1);
if (ret < 0)
return ret;
/* remember subprog */
insn[i + 1].imm = ret;
continue;
}
if (!bpf_pseudo_call(insn + i))
continue;
if (!env->bpf_capable) {
......@@ -2268,6 +2311,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_PERCPU_BTF_ID:
case PTR_TO_MEM:
case PTR_TO_MEM_OR_NULL:
case PTR_TO_FUNC:
case PTR_TO_MAP_KEY:
return true;
default:
return false;
......@@ -2872,6 +2917,10 @@ static int __check_mem_access(struct bpf_verifier_env *env, int regno,
reg = &cur_regs(env)[regno];
switch (reg->type) {
case PTR_TO_MAP_KEY:
verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
mem_size, off, size);
break;
case PTR_TO_MAP_VALUE:
verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
mem_size, off, size);
......@@ -3277,6 +3326,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
case PTR_TO_FLOW_KEYS:
pointer_desc = "flow keys ";
break;
case PTR_TO_MAP_KEY:
pointer_desc = "key ";
break;
case PTR_TO_MAP_VALUE:
pointer_desc = "value ";
break;
......@@ -3378,7 +3430,7 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
continue_func:
subprog_end = subprog[idx + 1].start;
for (; i < subprog_end; i++) {
if (!bpf_pseudo_call(insn + i))
if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
continue;
/* remember insn and function to return to */
ret_insn[frame] = i + 1;
......@@ -3815,7 +3867,19 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
/* for access checks, reg->off is just part of off */
off += reg->off;
if (reg->type == PTR_TO_MAP_VALUE) {
if (reg->type == PTR_TO_MAP_KEY) {
if (t == BPF_WRITE) {
verbose(env, "write to change key R%d not allowed\n", regno);
return -EACCES;
}
err = check_mem_region_access(env, regno, off, size,
reg->map_ptr->key_size, false);
if (err)
return err;
if (value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_MAP_VALUE) {
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
verbose(env, "R%d leaks addr into map\n", value_regno);
......@@ -4231,6 +4295,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
case PTR_TO_PACKET_META:
return check_packet_access(env, regno, reg->off, access_size,
zero_size_allowed);
case PTR_TO_MAP_KEY:
return check_mem_region_access(env, regno, reg->off, access_size,
reg->map_ptr->key_size, false);
case PTR_TO_MAP_VALUE:
if (check_map_access_type(env, regno, reg->off, access_size,
meta && meta->raw_mode ? BPF_WRITE :
......@@ -4447,6 +4514,7 @@ static const struct bpf_reg_types map_key_value_types = {
PTR_TO_STACK,
PTR_TO_PACKET,
PTR_TO_PACKET_META,
PTR_TO_MAP_KEY,
PTR_TO_MAP_VALUE,
},
};
......@@ -4478,6 +4546,7 @@ static const struct bpf_reg_types mem_types = {
PTR_TO_STACK,
PTR_TO_PACKET,
PTR_TO_PACKET_META,
PTR_TO_MAP_KEY,
PTR_TO_MAP_VALUE,
PTR_TO_MEM,
PTR_TO_RDONLY_BUF,
......@@ -4490,6 +4559,7 @@ static const struct bpf_reg_types int_ptr_types = {
PTR_TO_STACK,
PTR_TO_PACKET,
PTR_TO_PACKET_META,
PTR_TO_MAP_KEY,
PTR_TO_MAP_VALUE,
},
};
......@@ -4502,6 +4572,8 @@ static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_T
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
......@@ -4530,6 +4602,8 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_INT] = &int_ptr_types,
[ARG_PTR_TO_LONG] = &int_ptr_types,
[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
[ARG_PTR_TO_FUNC] = &func_ptr_types,
[ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types,
};
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
......@@ -4711,6 +4785,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
verbose(env, "verifier internal error\n");
return -EFAULT;
}
} else if (arg_type == ARG_PTR_TO_FUNC) {
meta->subprogno = reg->subprogno;
} else if (arg_type_is_mem_ptr(arg_type)) {
/* The access to this pointer is only checked when we hit the
* next is_mem_size argument below.
......@@ -5231,13 +5307,19 @@ static void clear_caller_saved_regs(struct bpf_verifier_env *env,
}
}
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx)
typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee,
int insn_idx);
static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx, int subprog,
set_callee_state_fn set_callee_state_cb)
{
struct bpf_verifier_state *state = env->cur_state;
struct bpf_func_info_aux *func_info_aux;
struct bpf_func_state *caller, *callee;
int i, err, subprog, target_insn;
int err;
bool is_global = false;
if (state->curframe + 1 >= MAX_CALL_FRAMES) {
......@@ -5246,14 +5328,6 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -E2BIG;
}
target_insn = *insn_idx + insn->imm;
subprog = find_subprog(env, target_insn + 1);
if (subprog < 0) {
verbose(env, "verifier bug. No program starts at insn %d\n",
target_insn + 1);
return -EFAULT;
}
caller = state->frame[state->curframe];
if (state->frame[state->curframe + 1]) {
verbose(env, "verifier bug. Frame %d already allocated\n",
......@@ -5308,11 +5382,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (err)
return err;
/* copy r1 - r5 args that callee can access. The copy includes parent
* pointers, which connects us up to the liveness chain
*/
for (i = BPF_REG_1; i <= BPF_REG_5; i++)
callee->regs[i] = caller->regs[i];
err = set_callee_state_cb(env, caller, callee, *insn_idx);
if (err)
return err;
clear_caller_saved_regs(env, caller->regs);
......@@ -5320,7 +5392,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
state->curframe++;
/* and go analyze first insn of the callee */
*insn_idx = target_insn;
*insn_idx = env->subprog_info[subprog].start - 1;
if (env->log.level & BPF_LOG_LEVEL) {
verbose(env, "caller:\n");
......@@ -5331,6 +5403,92 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return 0;
}
int map_set_for_each_callback_args(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee)
{
/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
* void *callback_ctx, u64 flags);
* callback_fn(struct bpf_map *map, void *key, void *value,
* void *callback_ctx);
*/
callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
/* pointer to stack or null */
callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
/* unused */
__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
return 0;
}
static int set_callee_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee, int insn_idx)
{
int i;
/* copy r1 - r5 args that callee can access. The copy includes parent
* pointers, which connects us up to the liveness chain
*/
for (i = BPF_REG_1; i <= BPF_REG_5; i++)
callee->regs[i] = caller->regs[i];
return 0;
}
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx)
{
int subprog, target_insn;
target_insn = *insn_idx + insn->imm + 1;
subprog = find_subprog(env, target_insn);
if (subprog < 0) {
verbose(env, "verifier bug. No program starts at insn %d\n",
target_insn);
return -EFAULT;
}
return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
}
static int set_map_elem_callback_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee,
int insn_idx)
{
struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
struct bpf_map *map;
int err;
if (bpf_map_ptr_poisoned(insn_aux)) {
verbose(env, "tail_call abusing map_ptr\n");
return -EINVAL;
}
map = BPF_MAP_PTR(insn_aux->map_ptr_state);
if (!map->ops->map_set_for_each_callback_args ||
!map->ops->map_for_each_callback) {
verbose(env, "callback function not allowed for map\n");
return -ENOTSUPP;
}
err = map->ops->map_set_for_each_callback_args(env, caller, callee);
if (err)
return err;
callee->in_callback_fn = true;
return 0;
}
static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
{
struct bpf_verifier_state *state = env->cur_state;
......@@ -5353,8 +5511,22 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
state->curframe--;
caller = state->frame[state->curframe];
/* return to the caller whatever r0 had in the callee */
caller->regs[BPF_REG_0] = *r0;
if (callee->in_callback_fn) {
/* enforce R0 return value range [0, 1]. */
struct tnum range = tnum_range(0, 1);
if (r0->type != SCALAR_VALUE) {
verbose(env, "R0 not a scalar value\n");
return -EACCES;
}
if (!tnum_in(range, r0->var_off)) {
verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
return -EINVAL;
}
} else {
/* return to the caller whatever r0 had in the callee */
caller->regs[BPF_REG_0] = *r0;
}
/* Transfer references to the caller */
err = transfer_reference_state(caller, callee);
......@@ -5409,7 +5581,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_map_push_elem &&
func_id != BPF_FUNC_map_pop_elem &&
func_id != BPF_FUNC_map_peek_elem)
func_id != BPF_FUNC_map_peek_elem &&
func_id != BPF_FUNC_for_each_map_elem)
return 0;
if (map == NULL) {
......@@ -5490,15 +5663,18 @@ static int check_reference_leak(struct bpf_verifier_env *env)
return state->acquired_refs ? -EINVAL : 0;
}
static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
const struct bpf_func_proto *fn = NULL;
struct bpf_reg_state *regs;
struct bpf_call_arg_meta meta;
int insn_idx = *insn_idx_p;
bool changes_data;
int i, err;
int i, err, func_id;
/* find function prototype */
func_id = insn->imm;
if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
func_id);
......@@ -5594,6 +5770,13 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return -EINVAL;
}
if (func_id == BPF_FUNC_for_each_map_elem) {
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_map_elem_callback_state);
if (err < 0)
return -EINVAL;
}
/* reset caller saved regs */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
mark_reg_not_init(env, regs, caller_saved[i]);
......@@ -5847,6 +6030,19 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
else
*ptr_limit = -off;
return 0;
case PTR_TO_MAP_KEY:
/* Currently, this code is not exercised as the only use
* is bpf_for_each_map_elem() helper which requires
* bpf_capble. The code has been tested manually for
* future use.
*/
if (mask_to_left) {
*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
} else {
off = ptr_reg->smin_value + ptr_reg->off;
*ptr_limit = ptr_reg->map_ptr->key_size - off;
}
return 0;
case PTR_TO_MAP_VALUE:
if (mask_to_left) {
*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
......@@ -6048,6 +6244,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
verbose(env, "R%d pointer arithmetic on %s prohibited\n",
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
case PTR_TO_MAP_KEY:
case PTR_TO_MAP_VALUE:
if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
......@@ -8227,6 +8424,24 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
return 0;
}
if (insn->src_reg == BPF_PSEUDO_FUNC) {
struct bpf_prog_aux *aux = env->prog->aux;
u32 subprogno = insn[1].imm;
if (!aux->func_info) {
verbose(env, "missing btf func_info\n");
return -EINVAL;
}
if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
verbose(env, "callback function not static\n");
return -EINVAL;
}
dst_reg->type = PTR_TO_FUNC;
dst_reg->subprogno = subprogno;
return 0;
}
map = env->used_maps[aux->map_index];
mark_reg_known_zero(env, regs, insn->dst_reg);
dst_reg->map_ptr = map;
......@@ -8455,17 +8670,7 @@ static int check_return_code(struct bpf_verifier_env *env)
}
if (!tnum_in(range, reg->var_off)) {
char tn_buf[48];
verbose(env, "At program exit the register R0 ");
if (!tnum_is_unknown(reg->var_off)) {
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env, "has value %s", tn_buf);
} else {
verbose(env, "has unknown scalar value");
}
tnum_strn(tn_buf, sizeof(tn_buf), range);
verbose(env, " should have been in %s\n", tn_buf);
verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
return -EINVAL;
}
......@@ -8592,6 +8797,27 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
return DONE_EXPLORING;
}
static int visit_func_call_insn(int t, int insn_cnt,
struct bpf_insn *insns,
struct bpf_verifier_env *env,
bool visit_callee)
{
int ret;
ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
if (ret)
return ret;
if (t + 1 < insn_cnt)
init_explored_state(env, t + 1);
if (visit_callee) {
init_explored_state(env, t);
ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
env, false);
}
return ret;
}
/* Visits the instruction at index t and returns one of the following:
* < 0 - an error occurred
* DONE_EXPLORING - the instruction was fully explored
......@@ -8602,6 +8828,9 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
struct bpf_insn *insns = env->prog->insnsi;
int ret;
if (bpf_pseudo_func(insns + t))
return visit_func_call_insn(t, insn_cnt, insns, env, true);
/* All non-branch instructions have a single fall-through edge. */
if (BPF_CLASS(insns[t].code) != BPF_JMP &&
BPF_CLASS(insns[t].code) != BPF_JMP32)
......@@ -8612,18 +8841,8 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
return DONE_EXPLORING;
case BPF_CALL:
ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
if (ret)
return ret;
if (t + 1 < insn_cnt)
init_explored_state(env, t + 1);
if (insns[t].src_reg == BPF_PSEUDO_CALL) {
init_explored_state(env, t);
ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
env, false);
}
return ret;
return visit_func_call_insn(t, insn_cnt, insns, env,
insns[t].src_reg == BPF_PSEUDO_CALL);
case BPF_JA:
if (BPF_SRC(insns[t].code) != BPF_K)
......@@ -9232,6 +9451,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
*/
return false;
}
case PTR_TO_MAP_KEY:
case PTR_TO_MAP_VALUE:
/* If the new min/max/var_off satisfy the old ones and
* everything else matches, we are OK.
......@@ -10078,10 +10298,9 @@ static int do_check(struct bpf_verifier_env *env)
if (insn->src_reg == BPF_PSEUDO_CALL)
err = check_func_call(env, insn, &env->insn_idx);
else
err = check_helper_call(env, insn->imm, env->insn_idx);
err = check_helper_call(env, insn, &env->insn_idx);
if (err)
return err;
} else if (opcode == BPF_JA) {
if (BPF_SRC(insn->code) != BPF_K ||
insn->imm != 0 ||
......@@ -10510,6 +10729,12 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
goto next_insn;
}
if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
aux = &env->insn_aux_data[i];
aux->ptr_type = PTR_TO_FUNC;
goto next_insn;
}
/* In final convert_pseudo_ld_imm64() step, this is
* converted into regular 64-bit imm load insn.
*/
......@@ -10642,9 +10867,13 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
int insn_cnt = env->prog->len;
int i;
for (i = 0; i < insn_cnt; i++, insn++)
if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
insn->src_reg = 0;
for (i = 0; i < insn_cnt; i++, insn++) {
if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
continue;
if (insn->src_reg == BPF_PSEUDO_FUNC)
continue;
insn->src_reg = 0;
}
}
/* single env->prog->insni[off] instruction was replaced with the range
......@@ -11285,6 +11514,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
return 0;
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
if (bpf_pseudo_func(insn)) {
env->insn_aux_data[i].call_imm = insn->imm;
/* subprog is encoded in insn[1].imm */
continue;
}
if (!bpf_pseudo_call(insn))
continue;
/* Upon error here we cannot fall back to interpreter but
......@@ -11414,6 +11649,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
for (i = 0; i < env->subprog_cnt; i++) {
insn = func[i]->insnsi;
for (j = 0; j < func[i]->len; j++, insn++) {
if (bpf_pseudo_func(insn)) {
subprog = insn[1].imm;
insn[0].imm = (u32)(long)func[subprog]->bpf_func;
insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
continue;
}
if (!bpf_pseudo_call(insn))
continue;
subprog = insn->off;
......@@ -11459,6 +11700,11 @@ static int jit_subprogs(struct bpf_verifier_env *env)
* later look the same as if they were interpreted only.
*/
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
if (bpf_pseudo_func(insn)) {
insn[0].imm = env->insn_aux_data[i].call_imm;
insn[1].imm = find_subprog(env, i + insn[0].imm + 1);
continue;
}
if (!bpf_pseudo_call(insn))
continue;
insn->off = env->insn_aux_data[i].call_imm;
......@@ -11523,6 +11769,14 @@ static int fixup_call_args(struct bpf_verifier_env *env)
return -EINVAL;
}
for (i = 0; i < prog->len; i++, insn++) {
if (bpf_pseudo_func(insn)) {
/* When JIT fails the progs with callback calls
* have to be rejected, since interpreter doesn't support them yet.
*/
verbose(env, "callbacks are not allowed in non-JITed programs\n");
return -EINVAL;
}
if (!bpf_pseudo_call(insn))
continue;
depth = get_callee_stack_depth(env, insn, i);
......
......@@ -1371,6 +1371,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete:
return &bpf_task_storage_delete_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
default:
return NULL;
}
......
......@@ -196,6 +196,9 @@ static const char *print_imm(void *private_data,
else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
else if (insn->src_reg == BPF_PSEUDO_FUNC)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"subprog[%+d]", insn->imm);
else
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"0x%llx", (unsigned long long)full_imm);
......
......@@ -393,6 +393,15 @@ enum bpf_link_type {
* is struct/union.
*/
#define BPF_PSEUDO_BTF_ID 3
/* insn[0].src_reg: BPF_PSEUDO_FUNC
* insn[0].imm: insn offset to the func
* insn[1].imm: 0
* insn[0].off: 0
* insn[1].off: 0
* ldimm64 rewrite: address of the function
* verifier type: PTR_TO_FUNC.
*/
#define BPF_PSEUDO_FUNC 4
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
......@@ -3909,6 +3918,34 @@ union bpf_attr {
* * **BPF_MTU_CHK_RET_FRAG_NEEDED**
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
*
* long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
* Description
* For each element in **map**, call **callback_fn** function with
* **map**, **callback_ctx** and other map-specific parameters.
* The **callback_fn** should be a static function and
* the **callback_ctx** should be a pointer to the stack.
* The **flags** is used to control certain aspects of the helper.
* Currently, the **flags** must be 0.
*
* The following are a list of supported map types and their
* respective expected callback signatures:
*
* BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
* BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
* BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
*
* long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
*
* For per_cpu maps, the map_value is the value on the cpu where the
* bpf_prog is running.
*
* If **callback_fn** return 0, the helper will continue to the next
* element. If return value is 1, the helper will skip the rest of
* elements and return. Other return values are not used now.
*
* Return
* The number of traversed map elements for success, **-EINVAL** for
* invalid **flags**.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -4075,6 +4112,7 @@ union bpf_attr {
FN(ima_inode_hash), \
FN(sock_from_file), \
FN(check_mtu), \
FN(for_each_map_elem), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
......
......@@ -188,6 +188,7 @@ enum reloc_type {
RELO_CALL,
RELO_DATA,
RELO_EXTERN,
RELO_SUBPROG_ADDR,
};
struct reloc_desc {
......@@ -574,6 +575,16 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)
insn->off == 0;
}
static bool is_ldimm64(struct bpf_insn *insn)
{
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
}
static bool insn_is_pseudo_func(struct bpf_insn *insn)
{
return is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
}
static int
bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
const char *name, size_t sec_idx, const char *sec_name,
......@@ -2974,6 +2985,23 @@ static bool sym_is_extern(const GElf_Sym *sym)
GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
}
static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx)
{
int bind = GELF_ST_BIND(sym->st_info);
int type = GELF_ST_TYPE(sym->st_info);
/* in .text section */
if (sym->st_shndx != text_shndx)
return false;
/* local function */
if (bind == STB_LOCAL && type == STT_SECTION)
return true;
/* global function */
return bind == STB_GLOBAL && type == STT_FUNC;
}
static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
{
const struct btf_type *t;
......@@ -3395,7 +3423,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
return 0;
}
if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
if (!is_ldimm64(insn)) {
pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
prog->name, sym_name, insn_idx, insn->code);
return -LIBBPF_ERRNO__RELOC;
......@@ -3430,6 +3458,23 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
return -LIBBPF_ERRNO__RELOC;
}
/* loading subprog addresses */
if (sym_is_subprog(sym, obj->efile.text_shndx)) {
/* global_func: sym->st_value = offset in the section, insn->imm = 0.
* local_func: sym->st_value = 0, insn->imm = offset in the section.
*/
if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
prog->name, sym_name, (size_t)sym->st_value, insn->imm);
return -LIBBPF_ERRNO__RELOC;
}
reloc_desc->type = RELO_SUBPROG_ADDR;
reloc_desc->insn_idx = insn_idx;
reloc_desc->sym_off = sym->st_value;
return 0;
}
type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
......@@ -5566,11 +5611,6 @@ static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
}
static bool is_ldimm64(struct bpf_insn *insn)
{
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
}
static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
{
switch (BPF_SIZE(insn->code)) {
......@@ -6172,6 +6212,10 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
}
relo->processed = true;
break;
case RELO_SUBPROG_ADDR:
insn[0].src_reg = BPF_PSEUDO_FUNC;
/* will be handled as a follow up pass */
break;
case RELO_CALL:
/* will be handled as a follow up pass */
break;
......@@ -6358,11 +6402,11 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
if (!insn_is_subprog_call(insn))
if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
continue;
relo = find_prog_insn_relo(prog, insn_idx);
if (relo && relo->type != RELO_CALL) {
if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
prog->name, insn_idx, relo->type);
return -LIBBPF_ERRNO__RELOC;
......@@ -6374,8 +6418,22 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
* call always has imm = -1, but for static functions
* relocation is against STT_SECTION and insn->imm
* points to a start of a static function
*
* for subprog addr relocation, the relo->sym_off + insn->imm is
* the byte offset in the corresponding section.
*/
sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
if (relo->type == RELO_CALL)
sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
else
sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
} else if (insn_is_pseudo_func(insn)) {
/*
* RELO_SUBPROG_ADDR relo is always emitted even if both
* functions are in the same section, so it shouldn't reach here.
*/
pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
prog->name, insn_idx);
return -LIBBPF_ERRNO__RELOC;
} else {
/* if subprogram call is to a static function within
* the same ELF section, there won't be any relocation
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include <test_progs.h>
#include <network_helpers.h>
#include "for_each_hash_map_elem.skel.h"
#include "for_each_array_map_elem.skel.h"
static unsigned int duration;
static void test_hash_map(void)
{
int i, err, hashmap_fd, max_entries, percpu_map_fd;
struct for_each_hash_map_elem *skel;
__u64 *percpu_valbuf = NULL;
__u32 key, num_cpus, retval;
__u64 val;
skel = for_each_hash_map_elem__open_and_load();
if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load"))
return;
hashmap_fd = bpf_map__fd(skel->maps.hashmap);
max_entries = bpf_map__max_entries(skel->maps.hashmap);
for (i = 0; i < max_entries; i++) {
key = i;
val = i + 1;
err = bpf_map_update_elem(hashmap_fd, &key, &val, BPF_ANY);
if (!ASSERT_OK(err, "map_update"))
goto out;
}
num_cpus = bpf_num_possible_cpus();
percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
goto out;
key = 1;
for (i = 0; i < num_cpus; i++)
percpu_valbuf[i] = i + 1;
err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
if (!ASSERT_OK(err, "percpu_map_update"))
goto out;
err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
&retval, &duration);
if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
err, errno, retval))
goto out;
ASSERT_EQ(skel->bss->hashmap_output, 4, "hashmap_output");
ASSERT_EQ(skel->bss->hashmap_elems, max_entries, "hashmap_elems");
key = 1;
err = bpf_map_lookup_elem(hashmap_fd, &key, &val);
ASSERT_ERR(err, "hashmap_lookup");
ASSERT_EQ(skel->bss->percpu_called, 1, "percpu_called");
ASSERT_LT(skel->bss->cpu, num_cpus, "num_cpus");
ASSERT_EQ(skel->bss->percpu_map_elems, 1, "percpu_map_elems");
ASSERT_EQ(skel->bss->percpu_key, 1, "percpu_key");
ASSERT_EQ(skel->bss->percpu_val, skel->bss->cpu + 1, "percpu_val");
ASSERT_EQ(skel->bss->percpu_output, 100, "percpu_output");
out:
free(percpu_valbuf);
for_each_hash_map_elem__destroy(skel);
}
static void test_array_map(void)
{
__u32 key, num_cpus, max_entries, retval;
int i, arraymap_fd, percpu_map_fd, err;
struct for_each_array_map_elem *skel;
__u64 *percpu_valbuf = NULL;
__u64 val, expected_total;
skel = for_each_array_map_elem__open_and_load();
if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load"))
return;
arraymap_fd = bpf_map__fd(skel->maps.arraymap);
expected_total = 0;
max_entries = bpf_map__max_entries(skel->maps.arraymap);
for (i = 0; i < max_entries; i++) {
key = i;
val = i + 1;
/* skip the last iteration for expected total */
if (i != max_entries - 1)
expected_total += val;
err = bpf_map_update_elem(arraymap_fd, &key, &val, BPF_ANY);
if (!ASSERT_OK(err, "map_update"))
goto out;
}
num_cpus = bpf_num_possible_cpus();
percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
goto out;
key = 0;
for (i = 0; i < num_cpus; i++)
percpu_valbuf[i] = i + 1;
err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
if (!ASSERT_OK(err, "percpu_map_update"))
goto out;
err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
&retval, &duration);
if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
err, errno, retval))
goto out;
ASSERT_EQ(skel->bss->arraymap_output, expected_total, "array_output");
ASSERT_EQ(skel->bss->cpu + 1, skel->bss->percpu_val, "percpu_val");
out:
free(percpu_valbuf);
for_each_array_map_elem__destroy(skel);
}
void test_for_each(void)
{
if (test__start_subtest("hash_map"))
test_hash_map();
if (test__start_subtest("array_map"))
test_array_map();
}
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 3);
__type(key, __u32);
__type(value, __u64);
} arraymap SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 1);
__type(key, __u32);
__type(value, __u64);
} percpu_map SEC(".maps");
struct callback_ctx {
int output;
};
static __u64
check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
struct callback_ctx *data)
{
data->output += *val;
if (*key == 1)
return 1; /* stop the iteration */
return 0;
}
__u32 cpu = 0;
__u64 percpu_val = 0;
static __u64
check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
struct callback_ctx *data)
{
cpu = bpf_get_smp_processor_id();
percpu_val = *val;
return 0;
}
u32 arraymap_output = 0;
SEC("classifier")
int test_pkt_access(struct __sk_buff *skb)
{
struct callback_ctx data;
data.output = 0;
bpf_for_each_map_elem(&arraymap, check_array_elem, &data, 0);
arraymap_output = data.output;
bpf_for_each_map_elem(&percpu_map, check_percpu_elem, (void *)0, 0);
return 0;
}
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 3);
__type(key, __u32);
__type(value, __u64);
} hashmap SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
__uint(max_entries, 1);
__type(key, __u32);
__type(value, __u64);
} percpu_map SEC(".maps");
struct callback_ctx {
struct __sk_buff *ctx;
int input;
int output;
};
static __u64
check_hash_elem(struct bpf_map *map, __u32 *key, __u64 *val,
struct callback_ctx *data)
{
struct __sk_buff *skb = data->ctx;
__u32 k;
__u64 v;
if (skb) {
k = *key;
v = *val;
if (skb->len == 10000 && k == 10 && v == 10)
data->output = 3; /* impossible path */
else
data->output = 4;
} else {
data->output = data->input;
bpf_map_delete_elem(map, key);
}
return 0;
}
__u32 cpu = 0;
__u32 percpu_called = 0;
__u32 percpu_key = 0;
__u64 percpu_val = 0;
int percpu_output = 0;
static __u64
check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
struct callback_ctx *unused)
{
struct callback_ctx data;
percpu_called++;
cpu = bpf_get_smp_processor_id();
percpu_key = *key;
percpu_val = *val;
data.ctx = 0;
data.input = 100;
data.output = 0;
bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
percpu_output = data.output;
return 0;
}
int hashmap_output = 0;
int hashmap_elems = 0;
int percpu_map_elems = 0;
SEC("classifier")
int test_pkt_access(struct __sk_buff *skb)
{
struct callback_ctx data;
data.ctx = skb;
data.input = 10;
data.output = 0;
hashmap_elems = bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
hashmap_output = data.output;
percpu_map_elems = bpf_for_each_map_elem(&percpu_map, check_percpu_elem,
(void *)0, 0);
return 0;
}
......@@ -152,6 +152,17 @@ extern int test__join_cgroup(const char *path);
___ok; \
})
#define ASSERT_LT(actual, expected, name) ({ \
static int duration = 0; \
typeof(actual) ___act = (actual); \
typeof(expected) ___exp = (expected); \
bool ___ok = ___act < ___exp; \
CHECK(!___ok, (name), \
"unexpected %s: actual %lld >= expected %lld\n", \
(name), (long long)(___act), (long long)(___exp)); \
___ok; \
})
#define ASSERT_STREQ(actual, expected, name) ({ \
static int duration = 0; \
const char *___act = actual; \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment