Commit cc0f8353 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf: add bpf_for_each_map_elem() helper'

Yonghong Song says:

====================

This patch set introduced bpf_for_each_map_elem() helper.
The helper permits bpf program iterates through all elements
for a particular map.

The work originally inspired by an internal discussion where
firewall rules are kept in a map and bpf prog wants to
check packet 5 tuples against all rules in the map.
A bounded loop can be used but it has a few drawbacks.
As the loop iteration goes up, verification time goes up too.
For really large maps, verification may fail.
A helper which abstracts out the loop itself will not have
verification time issue.

A recent discussion in [1] involves to iterate all hash map
elements in bpf program. Currently iterating all hashmap elements
in bpf program is not easy if key space is really big.
Having a helper to abstract out the loop itself is even more
meaningful.

The proposed helper signature looks like:
  long bpf_for_each_map_elem(map, callback_fn, callback_ctx, flags)
where callback_fn is a static function and callback_ctx is
a piece of data allocated on the caller stack which can be
accessed by the callback_fn. The callback_fn signature might be
different for different maps. For example, for hash/array maps,
the signature is
  long callback_fn(map, key, val, callback_ctx)

In the rest of series, Patches 1/2/3/4 did some refactoring. Patch 5
implemented core kernel support for the helper. Patches 6 and 7
added hashmap and arraymap support. Patches 8/9 added libbpf
support. Patch 10 added bpftool support. Patches 11 and 12 added
selftests for hashmap and arraymap.

[1]: https://lore.kernel.org/bpf/20210122205415.113822-1-xiyou.wangcong@gmail.com/

Changelogs:
  v4 -> v5:
    - rebase on top of bpf-next.
  v3 -> v4:
    - better refactoring of check_func_call(), calculate subprogno outside
      of __check_func_call() helper. (Andrii)
    - better documentation (like the list of supported maps and their
      callback signatures) in uapi header. (Andrii)
    - implement and use ASSERT_LT in selftests. (Andrii)
    - a few other minor changes.
  v2 -> v3:
    - add comments in retrieve_ptr_limit(), which is in sanitize_ptr_alu(),
      to clarify the code is not executed for PTR_TO_MAP_KEY handling,
      but code is manually tested. (Alexei)
    - require BTF for callback function. (Alexei)
    - simplify hashmap/arraymap callback return handling as return value
      [0, 1] has been enforced by the verifier. (Alexei)
    - also mark global subprog (if used in ld_imm64) as RELO_SUBPROG_ADDR. (Andrii)
    - handle the condition to mark RELO_SUBPROG_ADDR properly. (Andrii)
    - make bpftool subprog insn offset dumping consist with pcrel calls. (Andrii)
  v1 -> v2:
    - setup callee frame in check_helper_call() and then proceed to verify
      helper return value as normal (Alexei)
    - use meta data to keep track of map/func pointer to avoid hard coding
      the register number (Alexei)
    - verify callback_fn return value range [0, 1]. (Alexei)
    - add migrate_{disable, enable} to ensure percpu value is the one
      bpf program expects to see. (Alexei)
    - change bpf_for_each_map_elem() return value to the number of iterated
      elements. (Andrii)
    - Change libbpf pseudo_func relo name to RELO_SUBPROG_ADDR and use
      more rigid checking for the relocation. (Andrii)
    - Better format to print out subprog address with bpftool. (Andrii)
    - Use bpf_prog_test_run to trigger bpf run, instead of bpf_iter. (Andrii)
    - Other misc changes.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 86fd1665 6b9e3331
...@@ -39,6 +39,7 @@ struct bpf_local_storage; ...@@ -39,6 +39,7 @@ struct bpf_local_storage;
struct bpf_local_storage_map; struct bpf_local_storage_map;
struct kobject; struct kobject;
struct mem_cgroup; struct mem_cgroup;
struct bpf_func_state;
extern struct idr btf_idr; extern struct idr btf_idr;
extern spinlock_t btf_idr_lock; extern spinlock_t btf_idr_lock;
...@@ -129,6 +130,13 @@ struct bpf_map_ops { ...@@ -129,6 +130,13 @@ struct bpf_map_ops {
bool (*map_meta_equal)(const struct bpf_map *meta0, bool (*map_meta_equal)(const struct bpf_map *meta0,
const struct bpf_map *meta1); const struct bpf_map *meta1);
int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee);
int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn,
void *callback_ctx, u64 flags);
/* BTF name and id of struct allocated by map_alloc */ /* BTF name and id of struct allocated by map_alloc */
const char * const map_btf_name; const char * const map_btf_name;
int *map_btf_id; int *map_btf_id;
...@@ -295,6 +303,8 @@ enum bpf_arg_type { ...@@ -295,6 +303,8 @@ enum bpf_arg_type {
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
ARG_PTR_TO_FUNC, /* pointer to a bpf program function */
ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */
__BPF_ARG_TYPE_MAX, __BPF_ARG_TYPE_MAX,
}; };
...@@ -411,6 +421,8 @@ enum bpf_reg_type { ...@@ -411,6 +421,8 @@ enum bpf_reg_type {
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
PTR_TO_FUNC, /* reg points to a bpf program function */
PTR_TO_MAP_KEY, /* reg points to a map element key */
}; };
/* The information passed from prog-specific *_is_valid_access /* The information passed from prog-specific *_is_valid_access
...@@ -1385,6 +1397,10 @@ void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux, ...@@ -1385,6 +1397,10 @@ void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux,
int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux, int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux,
struct bpf_link_info *info); struct bpf_link_info *info);
int map_set_for_each_callback_args(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee);
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
...@@ -1887,6 +1903,7 @@ extern const struct bpf_func_proto bpf_sock_from_file_proto; ...@@ -1887,6 +1903,7 @@ extern const struct bpf_func_proto bpf_sock_from_file_proto;
extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
extern const struct bpf_func_proto bpf_task_storage_get_proto; extern const struct bpf_func_proto bpf_task_storage_get_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_proto; extern const struct bpf_func_proto bpf_task_storage_delete_proto;
extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
const struct bpf_func_proto *bpf_tracing_func_proto( const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog); enum bpf_func_id func_id, const struct bpf_prog *prog);
......
...@@ -68,6 +68,8 @@ struct bpf_reg_state { ...@@ -68,6 +68,8 @@ struct bpf_reg_state {
unsigned long raw1; unsigned long raw1;
unsigned long raw2; unsigned long raw2;
} raw; } raw;
u32 subprogno; /* for PTR_TO_FUNC */
}; };
/* For PTR_TO_PACKET, used to find other pointers with the same variable /* For PTR_TO_PACKET, used to find other pointers with the same variable
* offset, so they can share range knowledge. * offset, so they can share range knowledge.
...@@ -204,6 +206,7 @@ struct bpf_func_state { ...@@ -204,6 +206,7 @@ struct bpf_func_state {
int acquired_refs; int acquired_refs;
struct bpf_reference_state *refs; struct bpf_reference_state *refs;
int allocated_stack; int allocated_stack;
bool in_callback_fn;
struct bpf_stack_state *stack; struct bpf_stack_state *stack;
}; };
......
...@@ -393,6 +393,15 @@ enum bpf_link_type { ...@@ -393,6 +393,15 @@ enum bpf_link_type {
* is struct/union. * is struct/union.
*/ */
#define BPF_PSEUDO_BTF_ID 3 #define BPF_PSEUDO_BTF_ID 3
/* insn[0].src_reg: BPF_PSEUDO_FUNC
* insn[0].imm: insn offset to the func
* insn[1].imm: 0
* insn[0].off: 0
* insn[1].off: 0
* ldimm64 rewrite: address of the function
* verifier type: PTR_TO_FUNC.
*/
#define BPF_PSEUDO_FUNC 4
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function * offset to another bpf function
...@@ -3909,6 +3918,34 @@ union bpf_attr { ...@@ -3909,6 +3918,34 @@ union bpf_attr {
* * **BPF_MTU_CHK_RET_FRAG_NEEDED** * * **BPF_MTU_CHK_RET_FRAG_NEEDED**
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG** * * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
* *
* long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
* Description
* For each element in **map**, call **callback_fn** function with
* **map**, **callback_ctx** and other map-specific parameters.
* The **callback_fn** should be a static function and
* the **callback_ctx** should be a pointer to the stack.
* The **flags** is used to control certain aspects of the helper.
* Currently, the **flags** must be 0.
*
* The following are a list of supported map types and their
* respective expected callback signatures:
*
* BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
* BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
* BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
*
* long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
*
* For per_cpu maps, the map_value is the value on the cpu where the
* bpf_prog is running.
*
* If **callback_fn** return 0, the helper will continue to the next
* element. If return value is 1, the helper will skip the rest of
* elements and return. Other return values are not used now.
*
* Return
* The number of traversed map elements for success, **-EINVAL** for
* invalid **flags**.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -4075,6 +4112,7 @@ union bpf_attr { ...@@ -4075,6 +4112,7 @@ union bpf_attr {
FN(ima_inode_hash), \ FN(ima_inode_hash), \
FN(sock_from_file), \ FN(sock_from_file), \
FN(check_mtu), \ FN(check_mtu), \
FN(for_each_map_elem), \
/* */ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
......
...@@ -625,6 +625,42 @@ static const struct bpf_iter_seq_info iter_seq_info = { ...@@ -625,6 +625,42 @@ static const struct bpf_iter_seq_info iter_seq_info = {
.seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info), .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
}; };
static int bpf_for_each_array_elem(struct bpf_map *map, void *callback_fn,
void *callback_ctx, u64 flags)
{
u32 i, key, num_elems = 0;
struct bpf_array *array;
bool is_percpu;
u64 ret = 0;
void *val;
if (flags != 0)
return -EINVAL;
is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
array = container_of(map, struct bpf_array, map);
if (is_percpu)
migrate_disable();
for (i = 0; i < map->max_entries; i++) {
if (is_percpu)
val = this_cpu_ptr(array->pptrs[i]);
else
val = array->value + array->elem_size * i;
num_elems++;
key = i;
ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
(u64)(long)&key, (u64)(long)val,
(u64)(long)callback_ctx, 0);
/* return value: 0 - continue, 1 - stop and return */
if (ret)
break;
}
if (is_percpu)
migrate_enable();
return num_elems;
}
static int array_map_btf_id; static int array_map_btf_id;
const struct bpf_map_ops array_map_ops = { const struct bpf_map_ops array_map_ops = {
.map_meta_equal = array_map_meta_equal, .map_meta_equal = array_map_meta_equal,
...@@ -643,6 +679,8 @@ const struct bpf_map_ops array_map_ops = { ...@@ -643,6 +679,8 @@ const struct bpf_map_ops array_map_ops = {
.map_check_btf = array_map_check_btf, .map_check_btf = array_map_check_btf,
.map_lookup_batch = generic_map_lookup_batch, .map_lookup_batch = generic_map_lookup_batch,
.map_update_batch = generic_map_update_batch, .map_update_batch = generic_map_update_batch,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_array_elem,
.map_btf_name = "bpf_array", .map_btf_name = "bpf_array",
.map_btf_id = &array_map_btf_id, .map_btf_id = &array_map_btf_id,
.iter_seq_info = &iter_seq_info, .iter_seq_info = &iter_seq_info,
...@@ -660,6 +698,8 @@ const struct bpf_map_ops percpu_array_map_ops = { ...@@ -660,6 +698,8 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_delete_elem = array_map_delete_elem, .map_delete_elem = array_map_delete_elem,
.map_seq_show_elem = percpu_array_map_seq_show_elem, .map_seq_show_elem = percpu_array_map_seq_show_elem,
.map_check_btf = array_map_check_btf, .map_check_btf = array_map_check_btf,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_array_elem,
.map_btf_name = "bpf_array", .map_btf_name = "bpf_array",
.map_btf_id = &percpu_array_map_btf_id, .map_btf_id = &percpu_array_map_btf_id,
.iter_seq_info = &iter_seq_info, .iter_seq_info = &iter_seq_info,
......
...@@ -675,3 +675,19 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) ...@@ -675,3 +675,19 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
*/ */
return ret == 0 ? 0 : -EAGAIN; return ret == 0 ? 0 : -EAGAIN;
} }
BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn,
void *, callback_ctx, u64, flags)
{
return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags);
}
const struct bpf_func_proto bpf_for_each_map_elem_proto = {
.func = bpf_for_each_map_elem,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_FUNC,
.arg3_type = ARG_PTR_TO_STACK_OR_NULL,
.arg4_type = ARG_ANYTHING,
};
...@@ -1869,6 +1869,63 @@ static const struct bpf_iter_seq_info iter_seq_info = { ...@@ -1869,6 +1869,63 @@ static const struct bpf_iter_seq_info iter_seq_info = {
.seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info), .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info),
}; };
static int bpf_for_each_hash_elem(struct bpf_map *map, void *callback_fn,
void *callback_ctx, u64 flags)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct hlist_nulls_head *head;
struct hlist_nulls_node *n;
struct htab_elem *elem;
u32 roundup_key_size;
int i, num_elems = 0;
void __percpu *pptr;
struct bucket *b;
void *key, *val;
bool is_percpu;
u64 ret = 0;
if (flags != 0)
return -EINVAL;
is_percpu = htab_is_percpu(htab);
roundup_key_size = round_up(map->key_size, 8);
/* disable migration so percpu value prepared here will be the
* same as the one seen by the bpf program with bpf_map_lookup_elem().
*/
if (is_percpu)
migrate_disable();
for (i = 0; i < htab->n_buckets; i++) {
b = &htab->buckets[i];
rcu_read_lock();
head = &b->head;
hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
key = elem->key;
if (is_percpu) {
/* current cpu value for percpu map */
pptr = htab_elem_get_ptr(elem, map->key_size);
val = this_cpu_ptr(pptr);
} else {
val = elem->key + roundup_key_size;
}
num_elems++;
ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
(u64)(long)key, (u64)(long)val,
(u64)(long)callback_ctx, 0);
/* return value: 0 - continue, 1 - stop and return */
if (ret) {
rcu_read_unlock();
goto out;
}
}
rcu_read_unlock();
}
out:
if (is_percpu)
migrate_enable();
return num_elems;
}
static int htab_map_btf_id; static int htab_map_btf_id;
const struct bpf_map_ops htab_map_ops = { const struct bpf_map_ops htab_map_ops = {
.map_meta_equal = bpf_map_meta_equal, .map_meta_equal = bpf_map_meta_equal,
...@@ -1881,6 +1938,8 @@ const struct bpf_map_ops htab_map_ops = { ...@@ -1881,6 +1938,8 @@ const struct bpf_map_ops htab_map_ops = {
.map_delete_elem = htab_map_delete_elem, .map_delete_elem = htab_map_delete_elem,
.map_gen_lookup = htab_map_gen_lookup, .map_gen_lookup = htab_map_gen_lookup,
.map_seq_show_elem = htab_map_seq_show_elem, .map_seq_show_elem = htab_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab), BATCH_OPS(htab),
.map_btf_name = "bpf_htab", .map_btf_name = "bpf_htab",
.map_btf_id = &htab_map_btf_id, .map_btf_id = &htab_map_btf_id,
...@@ -1900,6 +1959,8 @@ const struct bpf_map_ops htab_lru_map_ops = { ...@@ -1900,6 +1959,8 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_delete_elem = htab_lru_map_delete_elem, .map_delete_elem = htab_lru_map_delete_elem,
.map_gen_lookup = htab_lru_map_gen_lookup, .map_gen_lookup = htab_lru_map_gen_lookup,
.map_seq_show_elem = htab_map_seq_show_elem, .map_seq_show_elem = htab_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab_lru), BATCH_OPS(htab_lru),
.map_btf_name = "bpf_htab", .map_btf_name = "bpf_htab",
.map_btf_id = &htab_lru_map_btf_id, .map_btf_id = &htab_lru_map_btf_id,
...@@ -2019,6 +2080,8 @@ const struct bpf_map_ops htab_percpu_map_ops = { ...@@ -2019,6 +2080,8 @@ const struct bpf_map_ops htab_percpu_map_ops = {
.map_update_elem = htab_percpu_map_update_elem, .map_update_elem = htab_percpu_map_update_elem,
.map_delete_elem = htab_map_delete_elem, .map_delete_elem = htab_map_delete_elem,
.map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab_percpu), BATCH_OPS(htab_percpu),
.map_btf_name = "bpf_htab", .map_btf_name = "bpf_htab",
.map_btf_id = &htab_percpu_map_btf_id, .map_btf_id = &htab_percpu_map_btf_id,
...@@ -2036,6 +2099,8 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { ...@@ -2036,6 +2099,8 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
.map_update_elem = htab_lru_percpu_map_update_elem, .map_update_elem = htab_lru_percpu_map_update_elem,
.map_delete_elem = htab_lru_map_delete_elem, .map_delete_elem = htab_lru_map_delete_elem,
.map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab_lru_percpu), BATCH_OPS(htab_lru_percpu),
.map_btf_name = "bpf_htab", .map_btf_name = "bpf_htab",
.map_btf_id = &htab_lru_percpu_map_btf_id, .map_btf_id = &htab_lru_percpu_map_btf_id,
......
...@@ -708,6 +708,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) ...@@ -708,6 +708,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_ringbuf_discard_proto; return &bpf_ringbuf_discard_proto;
case BPF_FUNC_ringbuf_query: case BPF_FUNC_ringbuf_query:
return &bpf_ringbuf_query_proto; return &bpf_ringbuf_query_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
default: default:
break; break;
} }
......
This diff is collapsed.
...@@ -1371,6 +1371,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -1371,6 +1371,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_task_storage_get_proto; return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete: case BPF_FUNC_task_storage_delete:
return &bpf_task_storage_delete_proto; return &bpf_task_storage_delete_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
default: default:
return NULL; return NULL;
} }
......
...@@ -196,6 +196,9 @@ static const char *print_imm(void *private_data, ...@@ -196,6 +196,9 @@ static const char *print_imm(void *private_data,
else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"map[id:%u][0]+%u", insn->imm, (insn + 1)->imm); "map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
else if (insn->src_reg == BPF_PSEUDO_FUNC)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"subprog[%+d]", insn->imm);
else else
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"0x%llx", (unsigned long long)full_imm); "0x%llx", (unsigned long long)full_imm);
......
...@@ -393,6 +393,15 @@ enum bpf_link_type { ...@@ -393,6 +393,15 @@ enum bpf_link_type {
* is struct/union. * is struct/union.
*/ */
#define BPF_PSEUDO_BTF_ID 3 #define BPF_PSEUDO_BTF_ID 3
/* insn[0].src_reg: BPF_PSEUDO_FUNC
* insn[0].imm: insn offset to the func
* insn[1].imm: 0
* insn[0].off: 0
* insn[1].off: 0
* ldimm64 rewrite: address of the function
* verifier type: PTR_TO_FUNC.
*/
#define BPF_PSEUDO_FUNC 4
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function * offset to another bpf function
...@@ -3909,6 +3918,34 @@ union bpf_attr { ...@@ -3909,6 +3918,34 @@ union bpf_attr {
* * **BPF_MTU_CHK_RET_FRAG_NEEDED** * * **BPF_MTU_CHK_RET_FRAG_NEEDED**
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG** * * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
* *
* long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
* Description
* For each element in **map**, call **callback_fn** function with
* **map**, **callback_ctx** and other map-specific parameters.
* The **callback_fn** should be a static function and
* the **callback_ctx** should be a pointer to the stack.
* The **flags** is used to control certain aspects of the helper.
* Currently, the **flags** must be 0.
*
* The following are a list of supported map types and their
* respective expected callback signatures:
*
* BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
* BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
* BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
*
* long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
*
* For per_cpu maps, the map_value is the value on the cpu where the
* bpf_prog is running.
*
* If **callback_fn** return 0, the helper will continue to the next
* element. If return value is 1, the helper will skip the rest of
* elements and return. Other return values are not used now.
*
* Return
* The number of traversed map elements for success, **-EINVAL** for
* invalid **flags**.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -4075,6 +4112,7 @@ union bpf_attr { ...@@ -4075,6 +4112,7 @@ union bpf_attr {
FN(ima_inode_hash), \ FN(ima_inode_hash), \
FN(sock_from_file), \ FN(sock_from_file), \
FN(check_mtu), \ FN(check_mtu), \
FN(for_each_map_elem), \
/* */ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
......
...@@ -188,6 +188,7 @@ enum reloc_type { ...@@ -188,6 +188,7 @@ enum reloc_type {
RELO_CALL, RELO_CALL,
RELO_DATA, RELO_DATA,
RELO_EXTERN, RELO_EXTERN,
RELO_SUBPROG_ADDR,
}; };
struct reloc_desc { struct reloc_desc {
...@@ -574,6 +575,16 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn) ...@@ -574,6 +575,16 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)
insn->off == 0; insn->off == 0;
} }
static bool is_ldimm64(struct bpf_insn *insn)
{
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
}
static bool insn_is_pseudo_func(struct bpf_insn *insn)
{
return is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
}
static int static int
bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
const char *name, size_t sec_idx, const char *sec_name, const char *name, size_t sec_idx, const char *sec_name,
...@@ -2974,6 +2985,23 @@ static bool sym_is_extern(const GElf_Sym *sym) ...@@ -2974,6 +2985,23 @@ static bool sym_is_extern(const GElf_Sym *sym)
GELF_ST_TYPE(sym->st_info) == STT_NOTYPE; GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
} }
static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx)
{
int bind = GELF_ST_BIND(sym->st_info);
int type = GELF_ST_TYPE(sym->st_info);
/* in .text section */
if (sym->st_shndx != text_shndx)
return false;
/* local function */
if (bind == STB_LOCAL && type == STT_SECTION)
return true;
/* global function */
return bind == STB_GLOBAL && type == STT_FUNC;
}
static int find_extern_btf_id(const struct btf *btf, const char *ext_name) static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
{ {
const struct btf_type *t; const struct btf_type *t;
...@@ -3395,7 +3423,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog, ...@@ -3395,7 +3423,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
return 0; return 0;
} }
if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) { if (!is_ldimm64(insn)) {
pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n", pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
prog->name, sym_name, insn_idx, insn->code); prog->name, sym_name, insn_idx, insn->code);
return -LIBBPF_ERRNO__RELOC; return -LIBBPF_ERRNO__RELOC;
...@@ -3430,6 +3458,23 @@ static int bpf_program__record_reloc(struct bpf_program *prog, ...@@ -3430,6 +3458,23 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
return -LIBBPF_ERRNO__RELOC; return -LIBBPF_ERRNO__RELOC;
} }
/* loading subprog addresses */
if (sym_is_subprog(sym, obj->efile.text_shndx)) {
/* global_func: sym->st_value = offset in the section, insn->imm = 0.
* local_func: sym->st_value = 0, insn->imm = offset in the section.
*/
if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
prog->name, sym_name, (size_t)sym->st_value, insn->imm);
return -LIBBPF_ERRNO__RELOC;
}
reloc_desc->type = RELO_SUBPROG_ADDR;
reloc_desc->insn_idx = insn_idx;
reloc_desc->sym_off = sym->st_value;
return 0;
}
type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
...@@ -5566,11 +5611,6 @@ static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx, ...@@ -5566,11 +5611,6 @@ static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
} }
static bool is_ldimm64(struct bpf_insn *insn)
{
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
}
static int insn_bpf_size_to_bytes(struct bpf_insn *insn) static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
{ {
switch (BPF_SIZE(insn->code)) { switch (BPF_SIZE(insn->code)) {
...@@ -6172,6 +6212,10 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) ...@@ -6172,6 +6212,10 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
} }
relo->processed = true; relo->processed = true;
break; break;
case RELO_SUBPROG_ADDR:
insn[0].src_reg = BPF_PSEUDO_FUNC;
/* will be handled as a follow up pass */
break;
case RELO_CALL: case RELO_CALL:
/* will be handled as a follow up pass */ /* will be handled as a follow up pass */
break; break;
...@@ -6358,11 +6402,11 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, ...@@ -6358,11 +6402,11 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) { for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
if (!insn_is_subprog_call(insn)) if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
continue; continue;
relo = find_prog_insn_relo(prog, insn_idx); relo = find_prog_insn_relo(prog, insn_idx);
if (relo && relo->type != RELO_CALL) { if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n", pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
prog->name, insn_idx, relo->type); prog->name, insn_idx, relo->type);
return -LIBBPF_ERRNO__RELOC; return -LIBBPF_ERRNO__RELOC;
...@@ -6374,8 +6418,22 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, ...@@ -6374,8 +6418,22 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
* call always has imm = -1, but for static functions * call always has imm = -1, but for static functions
* relocation is against STT_SECTION and insn->imm * relocation is against STT_SECTION and insn->imm
* points to a start of a static function * points to a start of a static function
*
* for subprog addr relocation, the relo->sym_off + insn->imm is
* the byte offset in the corresponding section.
*/ */
sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1; if (relo->type == RELO_CALL)
sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
else
sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
} else if (insn_is_pseudo_func(insn)) {
/*
* RELO_SUBPROG_ADDR relo is always emitted even if both
* functions are in the same section, so it shouldn't reach here.
*/
pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
prog->name, insn_idx);
return -LIBBPF_ERRNO__RELOC;
} else { } else {
/* if subprogram call is to a static function within /* if subprogram call is to a static function within
* the same ELF section, there won't be any relocation * the same ELF section, there won't be any relocation
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include <test_progs.h>
#include <network_helpers.h>
#include "for_each_hash_map_elem.skel.h"
#include "for_each_array_map_elem.skel.h"
static unsigned int duration;
static void test_hash_map(void)
{
int i, err, hashmap_fd, max_entries, percpu_map_fd;
struct for_each_hash_map_elem *skel;
__u64 *percpu_valbuf = NULL;
__u32 key, num_cpus, retval;
__u64 val;
skel = for_each_hash_map_elem__open_and_load();
if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load"))
return;
hashmap_fd = bpf_map__fd(skel->maps.hashmap);
max_entries = bpf_map__max_entries(skel->maps.hashmap);
for (i = 0; i < max_entries; i++) {
key = i;
val = i + 1;
err = bpf_map_update_elem(hashmap_fd, &key, &val, BPF_ANY);
if (!ASSERT_OK(err, "map_update"))
goto out;
}
num_cpus = bpf_num_possible_cpus();
percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
goto out;
key = 1;
for (i = 0; i < num_cpus; i++)
percpu_valbuf[i] = i + 1;
err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
if (!ASSERT_OK(err, "percpu_map_update"))
goto out;
err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
&retval, &duration);
if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
err, errno, retval))
goto out;
ASSERT_EQ(skel->bss->hashmap_output, 4, "hashmap_output");
ASSERT_EQ(skel->bss->hashmap_elems, max_entries, "hashmap_elems");
key = 1;
err = bpf_map_lookup_elem(hashmap_fd, &key, &val);
ASSERT_ERR(err, "hashmap_lookup");
ASSERT_EQ(skel->bss->percpu_called, 1, "percpu_called");
ASSERT_LT(skel->bss->cpu, num_cpus, "num_cpus");
ASSERT_EQ(skel->bss->percpu_map_elems, 1, "percpu_map_elems");
ASSERT_EQ(skel->bss->percpu_key, 1, "percpu_key");
ASSERT_EQ(skel->bss->percpu_val, skel->bss->cpu + 1, "percpu_val");
ASSERT_EQ(skel->bss->percpu_output, 100, "percpu_output");
out:
free(percpu_valbuf);
for_each_hash_map_elem__destroy(skel);
}
static void test_array_map(void)
{
__u32 key, num_cpus, max_entries, retval;
int i, arraymap_fd, percpu_map_fd, err;
struct for_each_array_map_elem *skel;
__u64 *percpu_valbuf = NULL;
__u64 val, expected_total;
skel = for_each_array_map_elem__open_and_load();
if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load"))
return;
arraymap_fd = bpf_map__fd(skel->maps.arraymap);
expected_total = 0;
max_entries = bpf_map__max_entries(skel->maps.arraymap);
for (i = 0; i < max_entries; i++) {
key = i;
val = i + 1;
/* skip the last iteration for expected total */
if (i != max_entries - 1)
expected_total += val;
err = bpf_map_update_elem(arraymap_fd, &key, &val, BPF_ANY);
if (!ASSERT_OK(err, "map_update"))
goto out;
}
num_cpus = bpf_num_possible_cpus();
percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
goto out;
key = 0;
for (i = 0; i < num_cpus; i++)
percpu_valbuf[i] = i + 1;
err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
if (!ASSERT_OK(err, "percpu_map_update"))
goto out;
err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
&retval, &duration);
if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
err, errno, retval))
goto out;
ASSERT_EQ(skel->bss->arraymap_output, expected_total, "array_output");
ASSERT_EQ(skel->bss->cpu + 1, skel->bss->percpu_val, "percpu_val");
out:
free(percpu_valbuf);
for_each_array_map_elem__destroy(skel);
}
void test_for_each(void)
{
if (test__start_subtest("hash_map"))
test_hash_map();
if (test__start_subtest("array_map"))
test_array_map();
}
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 3);
__type(key, __u32);
__type(value, __u64);
} arraymap SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 1);
__type(key, __u32);
__type(value, __u64);
} percpu_map SEC(".maps");
struct callback_ctx {
int output;
};
static __u64
check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
struct callback_ctx *data)
{
data->output += *val;
if (*key == 1)
return 1; /* stop the iteration */
return 0;
}
__u32 cpu = 0;
__u64 percpu_val = 0;
static __u64
check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
struct callback_ctx *data)
{
cpu = bpf_get_smp_processor_id();
percpu_val = *val;
return 0;
}
u32 arraymap_output = 0;
SEC("classifier")
int test_pkt_access(struct __sk_buff *skb)
{
struct callback_ctx data;
data.output = 0;
bpf_for_each_map_elem(&arraymap, check_array_elem, &data, 0);
arraymap_output = data.output;
bpf_for_each_map_elem(&percpu_map, check_percpu_elem, (void *)0, 0);
return 0;
}
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 3);
__type(key, __u32);
__type(value, __u64);
} hashmap SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
__uint(max_entries, 1);
__type(key, __u32);
__type(value, __u64);
} percpu_map SEC(".maps");
struct callback_ctx {
struct __sk_buff *ctx;
int input;
int output;
};
static __u64
check_hash_elem(struct bpf_map *map, __u32 *key, __u64 *val,
struct callback_ctx *data)
{
struct __sk_buff *skb = data->ctx;
__u32 k;
__u64 v;
if (skb) {
k = *key;
v = *val;
if (skb->len == 10000 && k == 10 && v == 10)
data->output = 3; /* impossible path */
else
data->output = 4;
} else {
data->output = data->input;
bpf_map_delete_elem(map, key);
}
return 0;
}
__u32 cpu = 0;
__u32 percpu_called = 0;
__u32 percpu_key = 0;
__u64 percpu_val = 0;
int percpu_output = 0;
static __u64
check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
struct callback_ctx *unused)
{
struct callback_ctx data;
percpu_called++;
cpu = bpf_get_smp_processor_id();
percpu_key = *key;
percpu_val = *val;
data.ctx = 0;
data.input = 100;
data.output = 0;
bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
percpu_output = data.output;
return 0;
}
int hashmap_output = 0;
int hashmap_elems = 0;
int percpu_map_elems = 0;
SEC("classifier")
int test_pkt_access(struct __sk_buff *skb)
{
struct callback_ctx data;
data.ctx = skb;
data.input = 10;
data.output = 0;
hashmap_elems = bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
hashmap_output = data.output;
percpu_map_elems = bpf_for_each_map_elem(&percpu_map, check_percpu_elem,
(void *)0, 0);
return 0;
}
...@@ -152,6 +152,17 @@ extern int test__join_cgroup(const char *path); ...@@ -152,6 +152,17 @@ extern int test__join_cgroup(const char *path);
___ok; \ ___ok; \
}) })
#define ASSERT_LT(actual, expected, name) ({ \
static int duration = 0; \
typeof(actual) ___act = (actual); \
typeof(expected) ___exp = (expected); \
bool ___ok = ___act < ___exp; \
CHECK(!___ok, (name), \
"unexpected %s: actual %lld >= expected %lld\n", \
(name), (long long)(___act), (long long)(___exp)); \
___ok; \
})
#define ASSERT_STREQ(actual, expected, name) ({ \ #define ASSERT_STREQ(actual, expected, name) ({ \
static int duration = 0; \ static int duration = 0; \
const char *___act = actual; \ const char *___act = actual; \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment