Commit 7d08c2c9 authored by Andrii Nakryiko's avatar Andrii Nakryiko Committed by Daniel Borkmann

bpf: Refactor BPF_PROG_RUN_ARRAY family of macros into functions

Similar to BPF_PROG_RUN, turn BPF_PROG_RUN_ARRAY macros into proper functions
with all the same readability and maintainability benefits. Making them into
functions required shuffling around bpf_set_run_ctx/bpf_reset_run_ctx
functions. Also, explicitly specifying the type of the BPF prog run callback
required adjusting __bpf_prog_run_save_cb() to accept const void *, casted
internally to const struct sk_buff.

Further, split out a cgroup-specific BPF_PROG_RUN_ARRAY_CG and
BPF_PROG_RUN_ARRAY_CG_FLAGS from the more generic BPF_PROG_RUN_ARRAY due to
the differences in bpf_run_ctx used for those two different use cases.

I think BPF_PROG_RUN_ARRAY_CG would benefit from further refactoring to accept
struct cgroup and enum bpf_attach_type instead of bpf_prog_array, fetching
cgrp->bpf.effective[type] and RCU-dereferencing it internally. But that
required including include/linux/cgroup-defs.h, which I wasn't sure is ok with
everyone.

The remaining generic BPF_PROG_RUN_ARRAY function will be extended to
pass-through user-provided context value in the next patch.
Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarYonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210815070609.987780-3-andrii@kernel.org
parent fb7dd8bc
...@@ -1146,67 +1146,116 @@ struct bpf_run_ctx {}; ...@@ -1146,67 +1146,116 @@ struct bpf_run_ctx {};
struct bpf_cg_run_ctx { struct bpf_cg_run_ctx {
struct bpf_run_ctx run_ctx; struct bpf_run_ctx run_ctx;
struct bpf_prog_array_item *prog_item; const struct bpf_prog_array_item *prog_item;
}; };
static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
{
struct bpf_run_ctx *old_ctx = NULL;
#ifdef CONFIG_BPF_SYSCALL
old_ctx = current->bpf_ctx;
current->bpf_ctx = new_ctx;
#endif
return old_ctx;
}
static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
{
#ifdef CONFIG_BPF_SYSCALL
current->bpf_ctx = old_ctx;
#endif
}
/* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */ /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
#define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0) #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0)
/* BPF program asks to set CN on the packet. */ /* BPF program asks to set CN on the packet. */
#define BPF_RET_SET_CN (1 << 0) #define BPF_RET_SET_CN (1 << 0)
#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \ typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
({ \
struct bpf_prog_array_item *_item; \
struct bpf_prog *_prog; \
struct bpf_prog_array *_array; \
struct bpf_run_ctx *old_run_ctx; \
struct bpf_cg_run_ctx run_ctx; \
u32 _ret = 1; \
u32 func_ret; \
migrate_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
_item = &_array->items[0]; \
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); \
while ((_prog = READ_ONCE(_item->prog))) { \
run_ctx.prog_item = _item; \
func_ret = func(_prog, ctx); \
_ret &= (func_ret & 1); \
*(ret_flags) |= (func_ret >> 1); \
_item++; \
} \
bpf_reset_run_ctx(old_run_ctx); \
rcu_read_unlock(); \
migrate_enable(); \
_ret; \
})
#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage) \ static __always_inline u32
({ \ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
struct bpf_prog_array_item *_item; \ const void *ctx, bpf_prog_run_fn run_prog,
struct bpf_prog *_prog; \ u32 *ret_flags)
struct bpf_prog_array *_array; \ {
struct bpf_run_ctx *old_run_ctx; \ const struct bpf_prog_array_item *item;
struct bpf_cg_run_ctx run_ctx; \ const struct bpf_prog *prog;
u32 _ret = 1; \ const struct bpf_prog_array *array;
migrate_disable(); \ struct bpf_run_ctx *old_run_ctx;
rcu_read_lock(); \ struct bpf_cg_run_ctx run_ctx;
_array = rcu_dereference(array); \ u32 ret = 1;
if (unlikely(check_non_null && !_array))\ u32 func_ret;
goto _out; \
_item = &_array->items[0]; \ migrate_disable();
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);\ rcu_read_lock();
while ((_prog = READ_ONCE(_item->prog))) { \ array = rcu_dereference(array_rcu);
run_ctx.prog_item = _item; \ item = &array->items[0];
_ret &= func(_prog, ctx); \ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
_item++; \ while ((prog = READ_ONCE(item->prog))) {
} \ run_ctx.prog_item = item;
bpf_reset_run_ctx(old_run_ctx); \ func_ret = run_prog(prog, ctx);
_out: \ ret &= (func_ret & 1);
rcu_read_unlock(); \ *(ret_flags) |= (func_ret >> 1);
migrate_enable(); \ item++;
_ret; \ }
}) bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock();
migrate_enable();
return ret;
}
static __always_inline u32
BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
const void *ctx, bpf_prog_run_fn run_prog)
{
const struct bpf_prog_array_item *item;
const struct bpf_prog *prog;
const struct bpf_prog_array *array;
struct bpf_run_ctx *old_run_ctx;
struct bpf_cg_run_ctx run_ctx;
u32 ret = 1;
migrate_disable();
rcu_read_lock();
array = rcu_dereference(array_rcu);
item = &array->items[0];
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
while ((prog = READ_ONCE(item->prog))) {
run_ctx.prog_item = item;
ret &= run_prog(prog, ctx);
item++;
}
bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock();
migrate_enable();
return ret;
}
static __always_inline u32
BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
const void *ctx, bpf_prog_run_fn run_prog)
{
const struct bpf_prog_array_item *item;
const struct bpf_prog *prog;
const struct bpf_prog_array *array;
u32 ret = 1;
migrate_disable();
rcu_read_lock();
array = rcu_dereference(array_rcu);
if (unlikely(!array))
goto out;
item = &array->items[0];
while ((prog = READ_ONCE(item->prog))) {
ret &= run_prog(prog, ctx);
item++;
}
out:
rcu_read_unlock();
migrate_enable();
return ret;
}
/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs /* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
* so BPF programs can request cwr for TCP packets. * so BPF programs can request cwr for TCP packets.
...@@ -1235,7 +1284,7 @@ _out: \ ...@@ -1235,7 +1284,7 @@ _out: \
u32 _flags = 0; \ u32 _flags = 0; \
bool _cn; \ bool _cn; \
u32 _ret; \ u32 _ret; \
_ret = BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, &_flags); \ _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
_cn = _flags & BPF_RET_SET_CN; \ _cn = _flags & BPF_RET_SET_CN; \
if (_ret) \ if (_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
...@@ -1244,12 +1293,6 @@ _out: \ ...@@ -1244,12 +1293,6 @@ _out: \
_ret; \ _ret; \
}) })
#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
__BPF_PROG_RUN_ARRAY(array, ctx, func, false, true)
#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \
__BPF_PROG_RUN_ARRAY(array, ctx, func, true, false)
#ifdef CONFIG_BPF_SYSCALL #ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active); DECLARE_PER_CPU(int, bpf_prog_active);
extern struct mutex bpf_stats_enabled_mutex; extern struct mutex bpf_stats_enabled_mutex;
...@@ -1284,20 +1327,6 @@ static inline void bpf_enable_instrumentation(void) ...@@ -1284,20 +1327,6 @@ static inline void bpf_enable_instrumentation(void)
migrate_enable(); migrate_enable();
} }
static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
{
struct bpf_run_ctx *old_ctx;
old_ctx = current->bpf_ctx;
current->bpf_ctx = new_ctx;
return old_ctx;
}
static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
{
current->bpf_ctx = old_ctx;
}
extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops; extern const struct file_operations bpf_prog_fops;
extern const struct file_operations bpf_iter_fops; extern const struct file_operations bpf_iter_fops;
......
...@@ -723,7 +723,7 @@ static inline void bpf_restore_data_end( ...@@ -723,7 +723,7 @@ static inline void bpf_restore_data_end(
cb->data_end = saved_data_end; cb->data_end = saved_data_end;
} }
static inline u8 *bpf_skb_cb(struct sk_buff *skb) static inline u8 *bpf_skb_cb(const struct sk_buff *skb)
{ {
/* eBPF programs may read/write skb->cb[] area to transfer meta /* eBPF programs may read/write skb->cb[] area to transfer meta
* data between tail calls. Since this also needs to work with * data between tail calls. Since this also needs to work with
...@@ -744,8 +744,9 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb) ...@@ -744,8 +744,9 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb)
/* Must be invoked with migration disabled */ /* Must be invoked with migration disabled */
static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog, static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
struct sk_buff *skb) const void *ctx)
{ {
const struct sk_buff *skb = ctx;
u8 *cb_data = bpf_skb_cb(skb); u8 *cb_data = bpf_skb_cb(skb);
u8 cb_saved[BPF_SKB_CB_LEN]; u8 cb_saved[BPF_SKB_CB_LEN];
u32 res; u32 res;
......
...@@ -1012,7 +1012,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, ...@@ -1012,7 +1012,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY( ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb); cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);
} else { } else {
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[type], skb,
__bpf_prog_run_save_cb); __bpf_prog_run_save_cb);
ret = (ret == 1 ? 0 : -EPERM); ret = (ret == 1 ? 0 : -EPERM);
} }
...@@ -1043,7 +1043,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, ...@@ -1043,7 +1043,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
int ret; int ret;
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, bpf_prog_run); ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[type], sk, bpf_prog_run);
return ret == 1 ? 0 : -EPERM; return ret == 1 ? 0 : -EPERM;
} }
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
...@@ -1090,7 +1090,7 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, ...@@ -1090,7 +1090,7 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
} }
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
ret = BPF_PROG_RUN_ARRAY_FLAGS(cgrp->bpf.effective[type], &ctx, ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[type], &ctx,
bpf_prog_run, flags); bpf_prog_run, flags);
return ret == 1 ? 0 : -EPERM; return ret == 1 ? 0 : -EPERM;
...@@ -1120,7 +1120,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, ...@@ -1120,7 +1120,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
int ret; int ret;
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops, ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[type], sock_ops,
bpf_prog_run); bpf_prog_run);
return ret == 1 ? 0 : -EPERM; return ret == 1 ? 0 : -EPERM;
} }
...@@ -1139,7 +1139,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, ...@@ -1139,7 +1139,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
rcu_read_lock(); rcu_read_lock();
cgrp = task_dfl_cgroup(current); cgrp = task_dfl_cgroup(current);
allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[type], &ctx,
bpf_prog_run); bpf_prog_run);
rcu_read_unlock(); rcu_read_unlock();
...@@ -1271,7 +1271,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, ...@@ -1271,7 +1271,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
rcu_read_lock(); rcu_read_lock();
cgrp = task_dfl_cgroup(current); cgrp = task_dfl_cgroup(current);
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, bpf_prog_run); ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[type], &ctx, bpf_prog_run);
rcu_read_unlock(); rcu_read_unlock();
kfree(ctx.cur_val); kfree(ctx.cur_val);
...@@ -1385,7 +1385,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, ...@@ -1385,7 +1385,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
} }
lock_sock(sk); lock_sock(sk);
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT], ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT],
&ctx, bpf_prog_run); &ctx, bpf_prog_run);
release_sock(sk); release_sock(sk);
...@@ -1495,7 +1495,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, ...@@ -1495,7 +1495,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
} }
lock_sock(sk); lock_sock(sk);
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT], ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
&ctx, bpf_prog_run); &ctx, bpf_prog_run);
release_sock(sk); release_sock(sk);
...@@ -1556,7 +1556,7 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, ...@@ -1556,7 +1556,7 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
* be called if that data shouldn't be "exported". * be called if that data shouldn't be "exported".
*/ */
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT], ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
&ctx, bpf_prog_run); &ctx, bpf_prog_run);
if (!ret) if (!ret)
return -EPERM; return -EPERM;
......
...@@ -124,7 +124,7 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) ...@@ -124,7 +124,7 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
* out of events when it was updated in between this and the * out of events when it was updated in between this and the
* rcu_dereference() which is accepted risk. * rcu_dereference() which is accepted risk.
*/ */
ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, bpf_prog_run); ret = BPF_PROG_RUN_ARRAY(call->prog_array, ctx, bpf_prog_run);
out: out:
__this_cpu_dec(bpf_prog_active); __this_cpu_dec(bpf_prog_active);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment