Commit 69c087ba authored by Yonghong Song's avatar Yonghong Song Committed by Alexei Starovoitov

bpf: Add bpf_for_each_map_elem() helper

The bpf_for_each_map_elem() helper is introduced which
iterates all map elements with a callback function. The
helper signature looks like
  long bpf_for_each_map_elem(map, callback_fn, callback_ctx, flags)
and for each map element, the callback_fn will be called. For example,
like hashmap, the callback signature may look like
  long callback_fn(map, key, val, callback_ctx)

There are two known use cases for this. One is from upstream ([1]) where
a for_each_map_elem helper may help implement a timeout mechanism
in a more generic way. Another is from our internal discussion
for a firewall use case where a map contains all the rules. The packet
data can be compared to all these rules to decide allow or deny
the packet.

For array maps, users can already use a bounded loop to traverse
elements. Using this helper can avoid using bounded loop. For other
type of maps (e.g., hash maps) where bounded loop is hard or
impossible to use, this helper provides a convenient way to
operate on all elements.

For callback_fn, besides map and map element, a callback_ctx,
allocated on caller stack, is also passed to the callback
function. This callback_ctx argument can provide additional
input and allow to write to caller stack for output.

If the callback_fn returns 0, the helper will iterate through next
element if available. If the callback_fn returns 1, the helper
will stop iterating and returns to the bpf program. Other return
values are not used for now.

Currently, this helper is only available with jit. It is possible
to make it work with interpreter with so effort but I leave it
as the future work.

[1]: https://lore.kernel.org/bpf/20210122205415.113822-1-xiyou.wangcong@gmail.com/Signed-off-by: default avatarYonghong Song <yhs@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210226204925.3884923-1-yhs@fb.com
parent 282a0f46
......@@ -39,6 +39,7 @@ struct bpf_local_storage;
struct bpf_local_storage_map;
struct kobject;
struct mem_cgroup;
struct bpf_func_state;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
......@@ -129,6 +130,13 @@ struct bpf_map_ops {
bool (*map_meta_equal)(const struct bpf_map *meta0,
const struct bpf_map *meta1);
int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee);
int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn,
void *callback_ctx, u64 flags);
/* BTF name and id of struct allocated by map_alloc */
const char * const map_btf_name;
int *map_btf_id;
......@@ -295,6 +303,8 @@ enum bpf_arg_type {
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
ARG_PTR_TO_FUNC, /* pointer to a bpf program function */
ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */
__BPF_ARG_TYPE_MAX,
};
......@@ -411,6 +421,8 @@ enum bpf_reg_type {
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
PTR_TO_FUNC, /* reg points to a bpf program function */
PTR_TO_MAP_KEY, /* reg points to a map element key */
};
/* The information passed from prog-specific *_is_valid_access
......@@ -1887,6 +1899,7 @@ extern const struct bpf_func_proto bpf_sock_from_file_proto;
extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
extern const struct bpf_func_proto bpf_task_storage_get_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
......
......@@ -68,6 +68,8 @@ struct bpf_reg_state {
unsigned long raw1;
unsigned long raw2;
} raw;
u32 subprogno; /* for PTR_TO_FUNC */
};
/* For PTR_TO_PACKET, used to find other pointers with the same variable
* offset, so they can share range knowledge.
......@@ -204,6 +206,7 @@ struct bpf_func_state {
int acquired_refs;
struct bpf_reference_state *refs;
int allocated_stack;
bool in_callback_fn;
struct bpf_stack_state *stack;
};
......
......@@ -393,6 +393,15 @@ enum bpf_link_type {
* is struct/union.
*/
#define BPF_PSEUDO_BTF_ID 3
/* insn[0].src_reg: BPF_PSEUDO_FUNC
* insn[0].imm: insn offset to the func
* insn[1].imm: 0
* insn[0].off: 0
* insn[1].off: 0
* ldimm64 rewrite: address of the function
* verifier type: PTR_TO_FUNC.
*/
#define BPF_PSEUDO_FUNC 4
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
......@@ -3909,6 +3918,34 @@ union bpf_attr {
* * **BPF_MTU_CHK_RET_FRAG_NEEDED**
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
*
* long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
* Description
* For each element in **map**, call **callback_fn** function with
* **map**, **callback_ctx** and other map-specific parameters.
* The **callback_fn** should be a static function and
* the **callback_ctx** should be a pointer to the stack.
* The **flags** is used to control certain aspects of the helper.
* Currently, the **flags** must be 0.
*
* The following are a list of supported map types and their
* respective expected callback signatures:
*
* BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
* BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
* BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
*
* long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
*
* For per_cpu maps, the map_value is the value on the cpu where the
* bpf_prog is running.
*
* If **callback_fn** return 0, the helper will continue to the next
* element. If return value is 1, the helper will skip the rest of
* elements and return. Other return values are not used now.
*
* Return
* The number of traversed map elements for success, **-EINVAL** for
* invalid **flags**.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -4075,6 +4112,7 @@ union bpf_attr {
FN(ima_inode_hash), \
FN(sock_from_file), \
FN(check_mtu), \
FN(for_each_map_elem), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
......
......@@ -675,3 +675,19 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
*/
return ret == 0 ? 0 : -EAGAIN;
}
BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn,
void *, callback_ctx, u64, flags)
{
return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags);
}
const struct bpf_func_proto bpf_for_each_map_elem_proto = {
.func = bpf_for_each_map_elem,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_FUNC,
.arg3_type = ARG_PTR_TO_STACK_OR_NULL,
.arg4_type = ARG_ANYTHING,
};
......@@ -708,6 +708,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_ringbuf_discard_proto;
case BPF_FUNC_ringbuf_query:
return &bpf_ringbuf_query_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
default:
break;
}
......
This diff is collapsed.
......@@ -1371,6 +1371,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete:
return &bpf_task_storage_delete_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
default:
return NULL;
}
......
......@@ -393,6 +393,15 @@ enum bpf_link_type {
* is struct/union.
*/
#define BPF_PSEUDO_BTF_ID 3
/* insn[0].src_reg: BPF_PSEUDO_FUNC
* insn[0].imm: insn offset to the func
* insn[1].imm: 0
* insn[0].off: 0
* insn[1].off: 0
* ldimm64 rewrite: address of the function
* verifier type: PTR_TO_FUNC.
*/
#define BPF_PSEUDO_FUNC 4
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
......@@ -3909,6 +3918,34 @@ union bpf_attr {
* * **BPF_MTU_CHK_RET_FRAG_NEEDED**
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
*
* long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
* Description
* For each element in **map**, call **callback_fn** function with
* **map**, **callback_ctx** and other map-specific parameters.
* The **callback_fn** should be a static function and
* the **callback_ctx** should be a pointer to the stack.
* The **flags** is used to control certain aspects of the helper.
* Currently, the **flags** must be 0.
*
* The following are a list of supported map types and their
* respective expected callback signatures:
*
* BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
* BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
* BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
*
* long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
*
* For per_cpu maps, the map_value is the value on the cpu where the
* bpf_prog is running.
*
* If **callback_fn** return 0, the helper will continue to the next
* element. If return value is 1, the helper will skip the rest of
* elements and return. Other return values are not used now.
*
* Return
* The number of traversed map elements for success, **-EINVAL** for
* invalid **flags**.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -4075,6 +4112,7 @@ union bpf_attr {
FN(ima_inode_hash), \
FN(sock_from_file), \
FN(check_mtu), \
FN(for_each_map_elem), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment