Commit 2a1aff60 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'Introduce unstable CT lookup helpers'

Kumar Kartikeya says:

====================

This series adds unstable conntrack lookup helpers using BPF kfunc support.  The
patch adding the lookup helper is based off of Maxim's recent patch to aid in
rebasing their series on top of this, all adjusted to work with module kfuncs [0].

  [0]: https://lore.kernel.org/bpf/20211019144655.3483197-8-maximmi@nvidia.com

To enable returning a reference to struct nf_conn, the verifier is extended to
support reference tracking for PTR_TO_BTF_ID, and kfunc is extended with support
for working as acquire/release functions, similar to existing BPF helpers. kfunc
returning pointer (limited to PTR_TO_BTF_ID in the kernel) can also return a
PTR_TO_BTF_ID_OR_NULL now, typically needed when acquiring a resource can fail.
kfunc can also receive PTR_TO_CTX and PTR_TO_MEM (with some limitations) as
arguments now. There is also support for passing a mem, len pair as argument
to kfunc now. In such cases, passing pointer to unsized type (void) is also
permitted.

Please see individual commits for details.

Changelog:
----------
v7 -> v8:
v7: https://lore.kernel.org/bpf/20220111180428.931466-1-memxor@gmail.com

 * Move enum btf_kfunc_hook to btf.c (Alexei)
 * Drop verbose log for unlikely failure case in __find_kfunc_desc_btf (Alexei)
 * Remove unnecessary barrier in register_btf_kfunc_id_set (Alexei)
 * Switch macro in bpf_nf test to __always_inline function (Alexei)

v6 -> v7:
v6: https://lore.kernel.org/bpf/20220102162115.1506833-1-memxor@gmail.com

 * Drop try_module_get_live patch, use flag in btf_module struct (Alexei)
 * Add comments and expand commit message detailing why we have to concatenate
   and sort vmlinux kfunc BTF ID sets (Alexei)
 * Use bpf_testmod for testing btf_try_get_module race (Alexei)
 * Use bpf_prog_type for both btf_kfunc_id_set_contains and
   register_btf_kfunc_id_set calls (Alexei)
 * In case of module set registration, directly assign set (Alexei)
 * Add CONFIG_USERFAULTFD=y to selftest config
 * Fix other nits

v5 -> v6:
v5: https://lore.kernel.org/bpf/20211230023705.3860970-1-memxor@gmail.com

 * Fix for a bug in btf_try_get_module leading to use-after-free
 * Drop *kallsyms_on_each_symbol loop, reinstate register_btf_kfunc_id_set (Alexei)
 * btf_free_kfunc_set_tab now takes struct btf, and handles resetting tab to NULL
 * Check return value btf_name_by_offset for param_name
 * Instead of using tmp_set, use btf->kfunc_set_tab directly, and simplify cleanup

v4 -> v5:
v4: https://lore.kernel.org/bpf/20211217015031.1278167-1-memxor@gmail.com

 * Move nf_conntrack helpers code to its own separate file (Toke, Pablo)
 * Remove verifier callbacks, put btf_id_sets in struct btf (Alexei)
  * Convert the in-kernel users away from the old API
 * Change len__ prefix convention to __sz suffix (Alexei)
 * Drop parent_ref_obj_id patch (Alexei)

v3 -> v4:
v3: https://lore.kernel.org/bpf/20211210130230.4128676-1-memxor@gmail.com

 * Guard unstable CT helpers with CONFIG_DEBUG_INFO_BTF_MODULES
 * Move addition of prog_test test kfuncs to selftest commit
 * Move negative kfunc tests to test_verifier suite
 * Limit struct nesting depth to 4, which should be enough for now

v2 -> v3:
v2: https://lore.kernel.org/bpf/20211209170929.3485242-1-memxor@gmail.com

 * Fix build error for !CONFIG_BPF_SYSCALL (Patchwork)

RFC v1 -> v2:
v1: https://lore.kernel.org/bpf/20211030144609.263572-1-memxor@gmail.com

 * Limit PTR_TO_MEM support to pointer to scalar, or struct with scalars (Alexei)
 * Use btf_id_set for checking acquire, release, ret type null (Alexei)
 * Introduce opts struct for CT helpers, move int err parameter to it
 * Add l4proto as parameter to CT helper's opts, remove separate tcp/udp helpers
 * Add support for mem, len argument pair to kfunc
 * Allow void * as pointer type for mem, len argument pair
 * Extend selftests to cover new additions to kfuncs
 * Copy ref_obj_id to PTR_TO_BTF_ID dst_reg on btf_struct_access, test it
 * Fix other misc nits, bugs, and expand commit messages
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents e80f2a0d 46565696
......@@ -573,7 +573,6 @@ struct bpf_verifier_ops {
const struct btf_type *t, int off, int size,
enum bpf_access_type atype,
u32 *next_btf_id);
bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner);
};
struct bpf_prog_offload_ops {
......@@ -1719,7 +1718,6 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner);
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info);
......@@ -1971,12 +1969,6 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
return -ENOTSUPP;
}
static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id,
struct module *owner)
{
return false;
}
static inline void bpf_map_put(struct bpf_map *map)
{
}
......
......@@ -521,6 +521,8 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
int check_ctx_reg(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno);
int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno);
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size);
......@@ -564,4 +566,9 @@ static inline u32 type_flag(u32 type)
return type & ~BPF_BASE_TYPE_MASK;
}
static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
{
return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
}
#endif /* _LINUX_BPF_VERIFIER_H */
......@@ -12,11 +12,33 @@
#define BTF_TYPE_EMIT(type) ((void)(type *)0)
#define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val)
enum btf_kfunc_type {
BTF_KFUNC_TYPE_CHECK,
BTF_KFUNC_TYPE_ACQUIRE,
BTF_KFUNC_TYPE_RELEASE,
BTF_KFUNC_TYPE_RET_NULL,
BTF_KFUNC_TYPE_MAX,
};
struct btf;
struct btf_member;
struct btf_type;
union bpf_attr;
struct btf_show;
struct btf_id_set;
struct btf_kfunc_id_set {
struct module *owner;
union {
struct {
struct btf_id_set *check_set;
struct btf_id_set *acquire_set;
struct btf_id_set *release_set;
struct btf_id_set *ret_null_set;
};
struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX];
};
};
extern const struct file_operations btf_fops;
......@@ -307,6 +329,11 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
struct btf *btf_parse_vmlinux(void);
struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
bool btf_kfunc_id_set_contains(const struct btf *btf,
enum bpf_prog_type prog_type,
enum btf_kfunc_type type, u32 kfunc_btf_id);
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
const struct btf_kfunc_id_set *s);
#else
static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
u32 type_id)
......@@ -318,50 +345,18 @@ static inline const char *btf_name_by_offset(const struct btf *btf,
{
return NULL;
}
#endif
struct kfunc_btf_id_set {
struct list_head list;
struct btf_id_set *set;
struct module *owner;
};
struct kfunc_btf_id_list {
struct list_head list;
struct mutex mutex;
};
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
struct kfunc_btf_id_set *s);
void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
struct kfunc_btf_id_set *s);
bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
struct module *owner);
extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
extern struct kfunc_btf_id_list prog_test_kfunc_list;
#else
static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
struct kfunc_btf_id_set *s)
{
}
static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
struct kfunc_btf_id_set *s)
static inline bool btf_kfunc_id_set_contains(const struct btf *btf,
enum bpf_prog_type prog_type,
enum btf_kfunc_type type,
u32 kfunc_btf_id)
{
return false;
}
static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist,
u32 kfunc_id, struct module *owner)
static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
const struct btf_kfunc_id_set *s)
{
return false;
return 0;
}
static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused;
static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused;
#endif
#define DEFINE_KFUNC_BTF_ID_SET(set, name) \
struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \
THIS_MODULE }
#endif
......@@ -11,6 +11,7 @@ struct btf_id_set {
#ifdef CONFIG_DEBUG_INFO_BTF
#include <linux/compiler.h> /* for __PASTE */
#include <linux/compiler_attributes.h> /* for __maybe_unused */
/*
* Following macros help to define lists of BTF IDs placed
......@@ -146,14 +147,14 @@ extern struct btf_id_set name;
#else
#define BTF_ID_LIST(name) static u32 name[5];
#define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
#define BTF_ID(prefix, name)
#define BTF_ID_UNUSED
#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n];
#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1];
#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n];
#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1];
#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1];
#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 };
#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 };
#define BTF_SET_END(name)
#endif /* CONFIG_DEBUG_INFO_BTF */
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NF_CONNTRACK_BPF_H
#define _NF_CONNTRACK_BPF_H
#include <linux/btf.h>
#include <linux/kconfig.h>
#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
(IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
extern int register_nf_conntrack_bpf(void);
#else
static inline int register_nf_conntrack_bpf(void)
{
return 0;
}
#endif
#endif /* _NF_CONNTRACK_BPF_H */
This diff is collapsed.
This diff is collapsed.
......@@ -5,6 +5,7 @@
#include <linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/vmalloc.h>
#include <linux/etherdevice.h>
#include <linux/filter.h>
......@@ -171,6 +172,8 @@ int noinline bpf_fentry_test1(int a)
{
return a + 1;
}
EXPORT_SYMBOL_GPL(bpf_fentry_test1);
ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO);
int noinline bpf_fentry_test2(int a, u64 b)
{
......@@ -232,22 +235,137 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
return sk;
}
struct prog_test_ref_kfunc {
int a;
int b;
struct prog_test_ref_kfunc *next;
};
static struct prog_test_ref_kfunc prog_test_struct = {
.a = 42,
.b = 108,
.next = &prog_test_struct,
};
noinline struct prog_test_ref_kfunc *
bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
{
/* randomly return NULL */
if (get_jiffies_64() % 2)
return NULL;
return &prog_test_struct;
}
noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
{
}
struct prog_test_pass1 {
int x0;
struct {
int x1;
struct {
int x2;
struct {
int x3;
};
};
};
};
struct prog_test_pass2 {
int len;
short arr1[4];
struct {
char arr2[4];
unsigned long arr3[8];
} x;
};
struct prog_test_fail1 {
void *p;
int x;
};
struct prog_test_fail2 {
int x8;
struct prog_test_pass1 x;
};
struct prog_test_fail3 {
int len;
char arr1[2];
char arr2[0];
};
noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
{
}
noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
{
}
noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
{
}
noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
{
}
noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
{
}
noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
{
}
noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
{
}
noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
{
}
noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
{
}
__diag_pop();
ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
BTF_SET_START(test_sk_kfunc_ids)
BTF_SET_START(test_sk_check_kfunc_ids)
BTF_ID(func, bpf_kfunc_call_test1)
BTF_ID(func, bpf_kfunc_call_test2)
BTF_ID(func, bpf_kfunc_call_test3)
BTF_SET_END(test_sk_kfunc_ids)
bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner)
{
if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id))
return true;
return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner);
}
BTF_ID(func, bpf_kfunc_call_test_acquire)
BTF_ID(func, bpf_kfunc_call_test_release)
BTF_ID(func, bpf_kfunc_call_test_pass_ctx)
BTF_ID(func, bpf_kfunc_call_test_pass1)
BTF_ID(func, bpf_kfunc_call_test_pass2)
BTF_ID(func, bpf_kfunc_call_test_fail1)
BTF_ID(func, bpf_kfunc_call_test_fail2)
BTF_ID(func, bpf_kfunc_call_test_fail3)
BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1)
BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1)
BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2)
BTF_SET_END(test_sk_check_kfunc_ids)
BTF_SET_START(test_sk_acquire_kfunc_ids)
BTF_ID(func, bpf_kfunc_call_test_acquire)
BTF_SET_END(test_sk_acquire_kfunc_ids)
BTF_SET_START(test_sk_release_kfunc_ids)
BTF_ID(func, bpf_kfunc_call_test_release)
BTF_SET_END(test_sk_release_kfunc_ids)
BTF_SET_START(test_sk_ret_null_kfunc_ids)
BTF_ID(func, bpf_kfunc_call_test_acquire)
BTF_SET_END(test_sk_ret_null_kfunc_ids)
static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
u32 headroom, u32 tailroom)
......@@ -1067,3 +1185,17 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
kfree(ctx);
return err;
}
static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
.owner = THIS_MODULE,
.check_set = &test_sk_check_kfunc_ids,
.acquire_set = &test_sk_acquire_kfunc_ids,
.release_set = &test_sk_release_kfunc_ids,
.ret_null_set = &test_sk_ret_null_kfunc_ids,
};
static int __init bpf_prog_test_run_init(void)
{
return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
}
late_initcall(bpf_prog_test_run_init);
......@@ -10062,7 +10062,6 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
.convert_ctx_access = tc_cls_act_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
.gen_ld_abs = bpf_gen_ld_abs,
.check_kfunc_call = bpf_prog_test_check_kfunc_call,
};
const struct bpf_prog_ops tc_cls_act_prog_ops = {
......
......@@ -299,6 +299,7 @@ struct net *get_net_ns_by_id(const struct net *net, int id)
return peer;
}
EXPORT_SYMBOL_GPL(get_net_ns_by_id);
/*
* setup_net runs the initializers for the network namespace object.
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <linux/init.h>
#include <linux/types.h>
#include <linux/bpf_verifier.h>
#include <linux/bpf.h>
......@@ -212,26 +213,23 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
}
}
BTF_SET_START(bpf_tcp_ca_kfunc_ids)
BTF_SET_START(bpf_tcp_ca_check_kfunc_ids)
BTF_ID(func, tcp_reno_ssthresh)
BTF_ID(func, tcp_reno_cong_avoid)
BTF_ID(func, tcp_reno_undo_cwnd)
BTF_ID(func, tcp_slow_start)
BTF_ID(func, tcp_cong_avoid_ai)
BTF_SET_END(bpf_tcp_ca_kfunc_ids)
BTF_SET_END(bpf_tcp_ca_check_kfunc_ids)
static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner)
{
if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id))
return true;
return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner);
}
static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = {
.owner = THIS_MODULE,
.check_set = &bpf_tcp_ca_check_kfunc_ids,
};
static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
.get_func_proto = bpf_tcp_ca_get_func_proto,
.is_valid_access = bpf_tcp_ca_is_valid_access,
.btf_struct_access = bpf_tcp_ca_btf_struct_access,
.check_kfunc_call = bpf_tcp_ca_check_kfunc_call,
};
static int bpf_tcp_ca_init_member(const struct btf_type *t,
......@@ -300,3 +298,9 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
.init = bpf_tcp_ca_init,
.name = "tcp_congestion_ops",
};
static int __init bpf_tcp_ca_kfunc_init(void)
{
return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set);
}
late_initcall(bpf_tcp_ca_kfunc_init);
......@@ -1154,7 +1154,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.set_state = bbr_set_state,
};
BTF_SET_START(tcp_bbr_kfunc_ids)
BTF_SET_START(tcp_bbr_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID(func, bbr_init)
......@@ -1167,25 +1167,27 @@ BTF_ID(func, bbr_min_tso_segs)
BTF_ID(func, bbr_set_state)
#endif
#endif
BTF_SET_END(tcp_bbr_kfunc_ids)
BTF_SET_END(tcp_bbr_check_kfunc_ids)
static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set);
static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = {
.owner = THIS_MODULE,
.check_set = &tcp_bbr_check_kfunc_ids,
};
static int __init bbr_register(void)
{
int ret;
BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
ret = tcp_register_congestion_control(&tcp_bbr_cong_ops);
if (ret)
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_bbr_kfunc_set);
if (ret < 0)
return ret;
register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
return 0;
return tcp_register_congestion_control(&tcp_bbr_cong_ops);
}
static void __exit bbr_unregister(void)
{
unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
}
......
......@@ -485,7 +485,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
.name = "cubic",
};
BTF_SET_START(tcp_cubic_kfunc_ids)
BTF_SET_START(tcp_cubic_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID(func, cubictcp_init)
......@@ -496,9 +496,12 @@ BTF_ID(func, cubictcp_cwnd_event)
BTF_ID(func, cubictcp_acked)
#endif
#endif
BTF_SET_END(tcp_cubic_kfunc_ids)
BTF_SET_END(tcp_cubic_check_kfunc_ids)
static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set);
static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = {
.owner = THIS_MODULE,
.check_set = &tcp_cubic_check_kfunc_ids,
};
static int __init cubictcp_register(void)
{
......@@ -534,16 +537,14 @@ static int __init cubictcp_register(void)
/* divide by bic_scale and by constant Srtt (100ms) */
do_div(cube_factor, bic_scale * 10);
ret = tcp_register_congestion_control(&cubictcp);
if (ret)
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_cubic_kfunc_set);
if (ret < 0)
return ret;
register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
return 0;
return tcp_register_congestion_control(&cubictcp);
}
static void __exit cubictcp_unregister(void)
{
unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
tcp_unregister_congestion_control(&cubictcp);
}
......
......@@ -238,7 +238,7 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = {
.name = "dctcp-reno",
};
BTF_SET_START(tcp_dctcp_kfunc_ids)
BTF_SET_START(tcp_dctcp_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID(func, dctcp_init)
......@@ -249,25 +249,27 @@ BTF_ID(func, dctcp_cwnd_undo)
BTF_ID(func, dctcp_state)
#endif
#endif
BTF_SET_END(tcp_dctcp_kfunc_ids)
BTF_SET_END(tcp_dctcp_check_kfunc_ids)
static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set);
static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = {
.owner = THIS_MODULE,
.check_set = &tcp_dctcp_check_kfunc_ids,
};
static int __init dctcp_register(void)
{
int ret;
BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE);
ret = tcp_register_congestion_control(&dctcp);
if (ret)
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_dctcp_kfunc_set);
if (ret < 0)
return ret;
register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
return 0;
return tcp_register_congestion_control(&dctcp);
}
static void __exit dctcp_unregister(void)
{
unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
tcp_unregister_congestion_control(&dctcp);
}
......
......@@ -14,6 +14,11 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
ifeq ($(CONFIG_NF_CONNTRACK),m)
nf_conntrack-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_conntrack_bpf.o
else ifeq ($(CONFIG_NF_CONNTRACK),y)
nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o
endif
obj-$(CONFIG_NETFILTER) = netfilter.o
......
// SPDX-License-Identifier: GPL-2.0-only
/* Unstable Conntrack Helpers for XDP and TC-BPF hook
*
* These are called from the XDP and SCHED_CLS BPF programs. Note that it is
* allowed to break compatibility for these functions since the interface they
* are exposed through to BPF programs is explicitly unstable.
*/
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/types.h>
#include <linux/btf_ids.h>
#include <linux/net_namespace.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
/* bpf_ct_opts - Options for CT lookup helpers
*
* Members:
* @netns_id - Specify the network namespace for lookup
* Values:
* BPF_F_CURRENT_NETNS (-1)
* Use namespace associated with ctx (xdp_md, __sk_buff)
* [0, S32_MAX]
* Network Namespace ID
* @error - Out parameter, set for any errors encountered
* Values:
* -EINVAL - Passed NULL for bpf_tuple pointer
* -EINVAL - opts->reserved is not 0
* -EINVAL - netns_id is less than -1
* -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
* -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
* -ENONET - No network namespace found for netns_id
* -ENOENT - Conntrack lookup could not find entry for tuple
* -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
* or sizeof(tuple->ipv6)
* @l4proto - Layer 4 protocol
* Values:
* IPPROTO_TCP, IPPROTO_UDP
* @reserved - Reserved member, will be reused for more options in future
* Values:
* 0
*/
struct bpf_ct_opts {
s32 netns_id;
s32 error;
u8 l4proto;
u8 reserved[3];
};
enum {
NF_BPF_CT_OPTS_SZ = 12,
};
static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
struct bpf_sock_tuple *bpf_tuple,
u32 tuple_len, u8 protonum,
s32 netns_id)
{
struct nf_conntrack_tuple_hash *hash;
struct nf_conntrack_tuple tuple;
if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
return ERR_PTR(-EPROTO);
if (unlikely(netns_id < BPF_F_CURRENT_NETNS))
return ERR_PTR(-EINVAL);
memset(&tuple, 0, sizeof(tuple));
switch (tuple_len) {
case sizeof(bpf_tuple->ipv4):
tuple.src.l3num = AF_INET;
tuple.src.u3.ip = bpf_tuple->ipv4.saddr;
tuple.src.u.tcp.port = bpf_tuple->ipv4.sport;
tuple.dst.u3.ip = bpf_tuple->ipv4.daddr;
tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport;
break;
case sizeof(bpf_tuple->ipv6):
tuple.src.l3num = AF_INET6;
memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
tuple.src.u.tcp.port = bpf_tuple->ipv6.sport;
memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport;
break;
default:
return ERR_PTR(-EAFNOSUPPORT);
}
tuple.dst.protonum = protonum;
if (netns_id >= 0) {
net = get_net_ns_by_id(net, netns_id);
if (unlikely(!net))
return ERR_PTR(-ENONET);
}
hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
if (netns_id >= 0)
put_net(net);
if (!hash)
return ERR_PTR(-ENOENT);
return nf_ct_tuplehash_to_ctrack(hash);
}
__diag_push();
__diag_ignore(GCC, 8, "-Wmissing-prototypes",
"Global functions as their definitions will be in nf_conntrack BTF");
/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
* reference to it
*
* Parameters:
* @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
* Cannot be NULL
* @bpf_tuple - Pointer to memory representing the tuple to look up
* Cannot be NULL
* @tuple__sz - Length of the tuple structure
* Must be one of sizeof(bpf_tuple->ipv4) or
* sizeof(bpf_tuple->ipv6)
* @opts - Additional options for lookup (documented above)
* Cannot be NULL
* @opts__sz - Length of the bpf_ct_opts structure
* Must be NF_BPF_CT_OPTS_SZ (12)
*/
struct nf_conn *
bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
{
struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
struct net *caller_net;
struct nf_conn *nfct;
BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
if (!opts)
return NULL;
if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
opts->error = -EINVAL;
return NULL;
}
caller_net = dev_net(ctx->rxq->dev);
nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
opts->netns_id);
if (IS_ERR(nfct)) {
opts->error = PTR_ERR(nfct);
return NULL;
}
return nfct;
}
/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
* reference to it
*
* Parameters:
* @skb_ctx - Pointer to ctx (__sk_buff) in TC program
* Cannot be NULL
* @bpf_tuple - Pointer to memory representing the tuple to look up
* Cannot be NULL
* @tuple__sz - Length of the tuple structure
* Must be one of sizeof(bpf_tuple->ipv4) or
* sizeof(bpf_tuple->ipv6)
* @opts - Additional options for lookup (documented above)
* Cannot be NULL
* @opts__sz - Length of the bpf_ct_opts structure
* Must be NF_BPF_CT_OPTS_SZ (12)
*/
struct nf_conn *
bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
{
struct sk_buff *skb = (struct sk_buff *)skb_ctx;
struct net *caller_net;
struct nf_conn *nfct;
BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
if (!opts)
return NULL;
if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
opts->error = -EINVAL;
return NULL;
}
caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
opts->netns_id);
if (IS_ERR(nfct)) {
opts->error = PTR_ERR(nfct);
return NULL;
}
return nfct;
}
/* bpf_ct_release - Release acquired nf_conn object
*
* This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
* the program if any references remain in the program in all of the explored
* states.
*
* Parameters:
* @nf_conn - Pointer to referenced nf_conn object, obtained using
* bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
*/
void bpf_ct_release(struct nf_conn *nfct)
{
if (!nfct)
return;
nf_ct_put(nfct);
}
__diag_pop()
BTF_SET_START(nf_ct_xdp_check_kfunc_ids)
BTF_ID(func, bpf_xdp_ct_lookup)
BTF_ID(func, bpf_ct_release)
BTF_SET_END(nf_ct_xdp_check_kfunc_ids)
BTF_SET_START(nf_ct_tc_check_kfunc_ids)
BTF_ID(func, bpf_skb_ct_lookup)
BTF_ID(func, bpf_ct_release)
BTF_SET_END(nf_ct_tc_check_kfunc_ids)
BTF_SET_START(nf_ct_acquire_kfunc_ids)
BTF_ID(func, bpf_xdp_ct_lookup)
BTF_ID(func, bpf_skb_ct_lookup)
BTF_SET_END(nf_ct_acquire_kfunc_ids)
BTF_SET_START(nf_ct_release_kfunc_ids)
BTF_ID(func, bpf_ct_release)
BTF_SET_END(nf_ct_release_kfunc_ids)
/* Both sets are identical */
#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids
static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = {
.owner = THIS_MODULE,
.check_set = &nf_ct_xdp_check_kfunc_ids,
.acquire_set = &nf_ct_acquire_kfunc_ids,
.release_set = &nf_ct_release_kfunc_ids,
.ret_null_set = &nf_ct_ret_null_kfunc_ids,
};
static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = {
.owner = THIS_MODULE,
.check_set = &nf_ct_tc_check_kfunc_ids,
.acquire_set = &nf_ct_acquire_kfunc_ids,
.release_set = &nf_ct_release_kfunc_ids,
.ret_null_set = &nf_ct_ret_null_kfunc_ids,
};
int register_nf_conntrack_bpf(void)
{
int ret;
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set);
return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set);
}
......@@ -34,6 +34,7 @@
#include <linux/rculist_nulls.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
......@@ -2748,8 +2749,15 @@ int nf_conntrack_init_start(void)
conntrack_gc_work_init(&conntrack_gc_work);
queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
ret = register_nf_conntrack_bpf();
if (ret < 0)
goto err_kfunc;
return 0;
err_kfunc:
cancel_delayed_work_sync(&conntrack_gc_work.dwork);
nf_conntrack_proto_fini();
err_proto:
nf_conntrack_seqadj_fini();
err_seqadj:
......
......@@ -109,26 +109,31 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
.write = bpf_testmod_test_write,
};
BTF_SET_START(bpf_testmod_kfunc_ids)
BTF_SET_START(bpf_testmod_check_kfunc_ids)
BTF_ID(func, bpf_testmod_test_mod_kfunc)
BTF_SET_END(bpf_testmod_kfunc_ids)
BTF_SET_END(bpf_testmod_check_kfunc_ids)
static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set);
static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
.owner = THIS_MODULE,
.check_set = &bpf_testmod_check_kfunc_ids,
};
extern int bpf_fentry_test1(int a);
static int bpf_testmod_init(void)
{
int ret;
ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
if (ret)
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
if (ret < 0)
return ret;
register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
return 0;
if (bpf_fentry_test1(0) < 0)
return -EINVAL;
return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
}
static void bpf_testmod_exit(void)
{
unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
}
......
......@@ -48,3 +48,8 @@ CONFIG_IMA_READ_POLICY=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_FUNCTION_TRACER=y
CONFIG_DYNAMIC_FTRACE=y
CONFIG_NETFILTER=y
CONFIG_NF_DEFRAG_IPV4=y
CONFIG_NF_DEFRAG_IPV6=y
CONFIG_NF_CONNTRACK=y
CONFIG_USERFAULTFD=y
// SPDX-License-Identifier: GPL-2.0
#include <unistd.h>
#include <pthread.h>
#include <sys/mman.h>
#include <stdatomic.h>
#include <test_progs.h>
#include <sys/syscall.h>
#include <linux/module.h>
#include <linux/userfaultfd.h>
#include "ksym_race.skel.h"
#include "bpf_mod_race.skel.h"
#include "kfunc_call_race.skel.h"
/* This test crafts a race between btf_try_get_module and do_init_module, and
* checks whether btf_try_get_module handles the invocation for a well-formed
* but uninitialized module correctly. Unless the module has completed its
* initcalls, the verifier should fail the program load and return ENXIO.
*
* userfaultfd is used to trigger a fault in an fmod_ret program, and make it
* sleep, then the BPF program is loaded and the return value from verifier is
* inspected. After this, the userfaultfd is closed so that the module loading
* thread makes forward progress, and fmod_ret injects an error so that the
* module load fails and it is freed.
*
* If the verifier succeeded in loading the supplied program, it will end up
* taking reference to freed module, and trigger a crash when the program fd
* is closed later. This is true for both kfuncs and ksyms. In both cases,
* the crash is triggered inside bpf_prog_free_deferred, when module reference
* is finally released.
*/
struct test_config {
const char *str_open;
void *(*bpf_open_and_load)();
void (*bpf_destroy)(void *);
};
enum test_state {
_TS_INVALID,
TS_MODULE_LOAD,
TS_MODULE_LOAD_FAIL,
};
static _Atomic enum test_state state = _TS_INVALID;
static int sys_finit_module(int fd, const char *param_values, int flags)
{
return syscall(__NR_finit_module, fd, param_values, flags);
}
static int sys_delete_module(const char *name, unsigned int flags)
{
return syscall(__NR_delete_module, name, flags);
}
static int load_module(const char *mod)
{
int ret, fd;
fd = open("bpf_testmod.ko", O_RDONLY);
if (fd < 0)
return fd;
ret = sys_finit_module(fd, "", 0);
close(fd);
if (ret < 0)
return ret;
return 0;
}
static void *load_module_thread(void *p)
{
if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail"))
atomic_store(&state, TS_MODULE_LOAD);
else
atomic_store(&state, TS_MODULE_LOAD_FAIL);
return p;
}
static int sys_userfaultfd(int flags)
{
return syscall(__NR_userfaultfd, flags);
}
static int test_setup_uffd(void *fault_addr)
{
struct uffdio_register uffd_register = {};
struct uffdio_api uffd_api = {};
int uffd;
uffd = sys_userfaultfd(O_CLOEXEC);
if (uffd < 0)
return -errno;
uffd_api.api = UFFD_API;
uffd_api.features = 0;
if (ioctl(uffd, UFFDIO_API, &uffd_api)) {
close(uffd);
return -1;
}
uffd_register.range.start = (unsigned long)fault_addr;
uffd_register.range.len = 4096;
uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) {
close(uffd);
return -1;
}
return uffd;
}
static void test_bpf_mod_race_config(const struct test_config *config)
{
void *fault_addr, *skel_fail;
struct bpf_mod_race *skel;
struct uffd_msg uffd_msg;
pthread_t load_mod_thrd;
_Atomic int *blockingp;
int uffd, ret;
fault_addr = mmap(0, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration"))
return;
if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod"))
goto end_mmap;
skel = bpf_mod_race__open();
if (!ASSERT_OK_PTR(skel, "bpf_mod_kfunc_race__open"))
goto end_module;
skel->rodata->bpf_mod_race_config.tgid = getpid();
skel->rodata->bpf_mod_race_config.inject_error = -4242;
skel->rodata->bpf_mod_race_config.fault_addr = fault_addr;
if (!ASSERT_OK(bpf_mod_race__load(skel), "bpf_mod___load"))
goto end_destroy;
blockingp = (_Atomic int *)&skel->bss->bpf_blocking;
if (!ASSERT_OK(bpf_mod_race__attach(skel), "bpf_mod_kfunc_race__attach"))
goto end_destroy;
uffd = test_setup_uffd(fault_addr);
if (!ASSERT_GE(uffd, 0, "userfaultfd open + register address"))
goto end_destroy;
if (!ASSERT_OK(pthread_create(&load_mod_thrd, NULL, load_module_thread, NULL),
"load module thread"))
goto end_uffd;
/* Now, we either fail loading module, or block in bpf prog, spin to find out */
while (!atomic_load(&state) && !atomic_load(blockingp))
;
if (!ASSERT_EQ(state, _TS_INVALID, "module load should block"))
goto end_join;
if (!ASSERT_EQ(*blockingp, 1, "module load blocked")) {
pthread_kill(load_mod_thrd, SIGKILL);
goto end_uffd;
}
/* We might have set bpf_blocking to 1, but may have not blocked in
* bpf_copy_from_user. Read userfaultfd descriptor to verify that.
*/
if (!ASSERT_EQ(read(uffd, &uffd_msg, sizeof(uffd_msg)), sizeof(uffd_msg),
"read uffd block event"))
goto end_join;
if (!ASSERT_EQ(uffd_msg.event, UFFD_EVENT_PAGEFAULT, "read uffd event is pagefault"))
goto end_join;
/* We know that load_mod_thrd is blocked in the fmod_ret program, the
* module state is still MODULE_STATE_COMING because mod->init hasn't
* returned. This is the time we try to load a program calling kfunc and
* check if we get ENXIO from verifier.
*/
skel_fail = config->bpf_open_and_load();
ret = errno;
if (!ASSERT_EQ(skel_fail, NULL, config->str_open)) {
/* Close uffd to unblock load_mod_thrd */
close(uffd);
uffd = -1;
while (atomic_load(blockingp) != 2)
;
ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
config->bpf_destroy(skel_fail);
goto end_join;
}
ASSERT_EQ(ret, ENXIO, "verifier returns ENXIO");
ASSERT_EQ(skel->data->res_try_get_module, false, "btf_try_get_module == false");
close(uffd);
uffd = -1;
end_join:
pthread_join(load_mod_thrd, NULL);
if (uffd < 0)
ASSERT_EQ(atomic_load(&state), TS_MODULE_LOAD_FAIL, "load_mod_thrd success");
end_uffd:
if (uffd >= 0)
close(uffd);
end_destroy:
bpf_mod_race__destroy(skel);
ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
end_module:
sys_delete_module("bpf_testmod", 0);
ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod");
end_mmap:
munmap(fault_addr, 4096);
atomic_store(&state, _TS_INVALID);
}
static const struct test_config ksym_config = {
.str_open = "ksym_race__open_and_load",
.bpf_open_and_load = (void *)ksym_race__open_and_load,
.bpf_destroy = (void *)ksym_race__destroy,
};
static const struct test_config kfunc_config = {
.str_open = "kfunc_call_race__open_and_load",
.bpf_open_and_load = (void *)kfunc_call_race__open_and_load,
.bpf_destroy = (void *)kfunc_call_race__destroy,
};
void serial_test_bpf_mod_race(void)
{
if (test__start_subtest("ksym (used_btfs UAF)"))
test_bpf_mod_race_config(&ksym_config);
if (test__start_subtest("kfunc (kfunc_btf_tab UAF)"))
test_bpf_mod_race_config(&kfunc_config);
}
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <network_helpers.h>
#include "test_bpf_nf.skel.h"
enum {
TEST_XDP,
TEST_TC_BPF,
};
void test_bpf_nf_ct(int mode)
{
struct test_bpf_nf *skel;
int prog_fd, err, retval;
skel = test_bpf_nf__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
return;
if (mode == TEST_XDP)
prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);
else
prog_fd = bpf_program__fd(skel->progs.nf_skb_ct_test);
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
(__u32 *)&retval, NULL);
if (!ASSERT_OK(err, "bpf_prog_test_run"))
goto end;
ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id");
ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup");
ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple");
end:
test_bpf_nf__destroy(skel);
}
void test_bpf_nf(void)
{
if (test__start_subtest("xdp-ct"))
test_bpf_nf_ct(TEST_XDP);
if (test__start_subtest("tc-bpf-ct"))
test_bpf_nf_ct(TEST_TC_BPF);
}
......@@ -27,6 +27,12 @@ static void test_main(void)
ASSERT_OK(err, "bpf_prog_test_run(test2)");
ASSERT_EQ(retval, 3, "test2-retval");
prog_fd = skel->progs.kfunc_call_test_ref_btf_id.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
NULL, NULL, (__u32 *)&retval, NULL);
ASSERT_OK(err, "bpf_prog_test_run(test_ref_btf_id)");
ASSERT_EQ(retval, 0, "test_ref_btf_id-retval");
kfunc_call_test_lskel__destroy(skel);
}
......
// SPDX-License-Identifier: GPL-2.0
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
const volatile struct {
/* thread to activate trace programs for */
pid_t tgid;
/* return error from __init function */
int inject_error;
/* uffd monitored range start address */
void *fault_addr;
} bpf_mod_race_config = { -1 };
int bpf_blocking = 0;
int res_try_get_module = -1;
static __always_inline bool check_thread_id(void)
{
struct task_struct *task = bpf_get_current_task_btf();
return task->tgid == bpf_mod_race_config.tgid;
}
/* The trace of execution is something like this:
*
* finit_module()
* load_module()
* prepare_coming_module()
* notifier_call(MODULE_STATE_COMING)
* btf_parse_module()
* btf_alloc_id() // Visible to userspace at this point
* list_add(btf_mod->list, &btf_modules)
* do_init_module()
* freeinit = kmalloc()
* ret = mod->init()
* bpf_prog_widen_race()
* bpf_copy_from_user()
* ...<sleep>...
* if (ret < 0)
* ...
* free_module()
* return ret
*
* At this point, module loading thread is blocked, we now load the program:
*
* bpf_check
* add_kfunc_call/check_pseudo_btf_id
* btf_try_get_module
* try_get_module_live == false
* return -ENXIO
*
* Without the fix (try_get_module_live in btf_try_get_module):
*
* bpf_check
* add_kfunc_call/check_pseudo_btf_id
* btf_try_get_module
* try_get_module == true
* <store module reference in btf_kfunc_tab or used_btf array>
* ...
* return fd
*
* Now, if we inject an error in the blocked program, our module will be freed
* (going straight from MODULE_STATE_COMING to MODULE_STATE_GOING).
* Later, when bpf program is freed, it will try to module_put already freed
* module. This is why try_get_module_live returns false if mod->state is not
* MODULE_STATE_LIVE.
*/
SEC("fmod_ret.s/bpf_fentry_test1")
int BPF_PROG(widen_race, int a, int ret)
{
char dst;
if (!check_thread_id())
return 0;
/* Indicate that we will attempt to block */
bpf_blocking = 1;
bpf_copy_from_user(&dst, 1, bpf_mod_race_config.fault_addr);
return bpf_mod_race_config.inject_error;
}
SEC("fexit/do_init_module")
int BPF_PROG(fexit_init_module, struct module *mod, int ret)
{
if (!check_thread_id())
return 0;
/* Indicate that we finished blocking */
bpf_blocking = 2;
return 0;
}
SEC("fexit/btf_try_get_module")
int BPF_PROG(fexit_module_get, const struct btf *btf, struct module *mod)
{
res_try_get_module = !!mod;
return 0;
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
extern void bpf_testmod_test_mod_kfunc(int i) __ksym;
SEC("tc")
int kfunc_call_fail(struct __sk_buff *ctx)
{
bpf_testmod_test_mod_kfunc(0);
return 0;
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
#include <linux/bpf.h>
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include "bpf_tcp_helpers.h"
extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
__u32 c, __u64 d) __ksym;
extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym;
extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym;
extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
SEC("tc")
int kfunc_call_test2(struct __sk_buff *skb)
{
......@@ -44,4 +51,45 @@ int kfunc_call_test1(struct __sk_buff *skb)
return ret;
}
SEC("tc")
int kfunc_call_test_ref_btf_id(struct __sk_buff *skb)
{
struct prog_test_ref_kfunc *pt;
unsigned long s = 0;
int ret = 0;
pt = bpf_kfunc_call_test_acquire(&s);
if (pt) {
if (pt->a != 42 || pt->b != 108)
ret = -1;
bpf_kfunc_call_test_release(pt);
}
return ret;
}
SEC("tc")
int kfunc_call_test_pass(struct __sk_buff *skb)
{
struct prog_test_pass1 p1 = {};
struct prog_test_pass2 p2 = {};
short a = 0;
__u64 b = 0;
long c = 0;
char d = 0;
int e = 0;
bpf_kfunc_call_test_pass_ctx(skb);
bpf_kfunc_call_test_pass1(&p1);
bpf_kfunc_call_test_pass2(&p2);
bpf_kfunc_call_test_mem_len_pass1(&a, sizeof(a));
bpf_kfunc_call_test_mem_len_pass1(&b, sizeof(b));
bpf_kfunc_call_test_mem_len_pass1(&c, sizeof(c));
bpf_kfunc_call_test_mem_len_pass1(&d, sizeof(d));
bpf_kfunc_call_test_mem_len_pass1(&e, sizeof(e));
bpf_kfunc_call_test_mem_len_fail2(&b, -1);
return 0;
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
extern int bpf_testmod_ksym_percpu __ksym;
SEC("tc")
int ksym_fail(struct __sk_buff *ctx)
{
return *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#define EAFNOSUPPORT 97
#define EPROTO 71
#define ENONET 64
#define EINVAL 22
#define ENOENT 2
int test_einval_bpf_tuple = 0;
int test_einval_reserved = 0;
int test_einval_netns_id = 0;
int test_einval_len_opts = 0;
int test_eproto_l4proto = 0;
int test_enonet_netns_id = 0;
int test_enoent_lookup = 0;
int test_eafnosupport = 0;
struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32,
struct bpf_ct_opts *, u32) __ksym;
struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
struct bpf_ct_opts *, u32) __ksym;
void bpf_ct_release(struct nf_conn *) __ksym;
static __always_inline void
nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32,
struct bpf_ct_opts *, u32),
void *ctx)
{
struct bpf_ct_opts opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
struct bpf_sock_tuple bpf_tuple;
struct nf_conn *ct;
__builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
ct = func(ctx, NULL, 0, &opts_def, sizeof(opts_def));
if (ct)
bpf_ct_release(ct);
else
test_einval_bpf_tuple = opts_def.error;
opts_def.reserved[0] = 1;
ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
opts_def.reserved[0] = 0;
opts_def.l4proto = IPPROTO_TCP;
if (ct)
bpf_ct_release(ct);
else
test_einval_reserved = opts_def.error;
opts_def.netns_id = -2;
ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
opts_def.netns_id = -1;
if (ct)
bpf_ct_release(ct);
else
test_einval_netns_id = opts_def.error;
ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def) - 1);
if (ct)
bpf_ct_release(ct);
else
test_einval_len_opts = opts_def.error;
opts_def.l4proto = IPPROTO_ICMP;
ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
opts_def.l4proto = IPPROTO_TCP;
if (ct)
bpf_ct_release(ct);
else
test_eproto_l4proto = opts_def.error;
opts_def.netns_id = 0xf00f;
ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
opts_def.netns_id = -1;
if (ct)
bpf_ct_release(ct);
else
test_enonet_netns_id = opts_def.error;
ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
if (ct)
bpf_ct_release(ct);
else
test_enoent_lookup = opts_def.error;
ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, sizeof(opts_def));
if (ct)
bpf_ct_release(ct);
else
test_eafnosupport = opts_def.error;
}
SEC("xdp")
int nf_xdp_ct_test(struct xdp_md *ctx)
{
nf_ct_test((void *)bpf_xdp_ct_lookup, ctx);
return 0;
}
SEC("tc")
int nf_skb_ct_test(struct __sk_buff *ctx)
{
nf_ct_test((void *)bpf_skb_ct_lookup, ctx);
return 0;
}
char _license[] SEC("license") = "GPL";
......@@ -31,6 +31,7 @@
#include <linux/if_ether.h>
#include <linux/btf.h>
#include <bpf/btf.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
......@@ -66,6 +67,11 @@ static bool unpriv_disabled = false;
static int skips;
static bool verbose = false;
struct kfunc_btf_id_pair {
const char *kfunc;
int insn_idx;
};
struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
......@@ -92,6 +98,7 @@ struct bpf_test {
int fixup_map_reuseport_array[MAX_FIXUPS];
int fixup_map_ringbuf[MAX_FIXUPS];
int fixup_map_timer[MAX_FIXUPS];
struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS];
/* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
* Can be a tab-separated sequence of expected strings. An empty string
* means no log verification.
......@@ -744,6 +751,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
int *fixup_map_ringbuf = test->fixup_map_ringbuf;
int *fixup_map_timer = test->fixup_map_timer;
struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id;
if (test->fill_helper) {
test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
......@@ -936,6 +944,26 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
fixup_map_timer++;
} while (*fixup_map_timer);
}
/* Patch in kfunc BTF IDs */
if (fixup_kfunc_btf_id->kfunc) {
struct btf *btf;
int btf_id;
do {
btf_id = 0;
btf = btf__load_vmlinux_btf();
if (btf) {
btf_id = btf__find_by_name_kind(btf,
fixup_kfunc_btf_id->kfunc,
BTF_KIND_FUNC);
btf_id = btf_id < 0 ? 0 : btf_id;
}
btf__free(btf);
prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id;
fixup_kfunc_btf_id++;
} while (fixup_kfunc_btf_id->kfunc);
}
}
struct libcap {
......
......@@ -21,6 +21,81 @@
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
.result = ACCEPT,
},
{
"calls: invalid kfunc call: ptr_to_mem to struct with non-scalar",
.insns = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "arg#0 pointer type STRUCT prog_test_fail1 must point to scalar",
.fixup_kfunc_btf_id = {
{ "bpf_kfunc_call_test_fail1", 2 },
},
},
{
"calls: invalid kfunc call: ptr_to_mem to struct with nesting depth > 4",
.insns = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "max struct nesting depth exceeded\narg#0 pointer type STRUCT prog_test_fail2",
.fixup_kfunc_btf_id = {
{ "bpf_kfunc_call_test_fail2", 2 },
},
},
{
"calls: invalid kfunc call: ptr_to_mem to struct with FAM",
.insns = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "arg#0 pointer type STRUCT prog_test_fail3 must point to scalar",
.fixup_kfunc_btf_id = {
{ "bpf_kfunc_call_test_fail3", 2 },
},
},
{
"calls: invalid kfunc call: reg->type != PTR_TO_CTX",
.insns = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "arg#0 expected pointer to ctx, but got PTR",
.fixup_kfunc_btf_id = {
{ "bpf_kfunc_call_test_pass_ctx", 2 },
},
},
{
"calls: invalid kfunc call: void * not allowed in func proto without mem size arg",
.insns = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "arg#0 pointer type UNKNOWN must point to scalar",
.fixup_kfunc_btf_id = {
{ "bpf_kfunc_call_test_mem_len_fail1", 2 },
},
},
{
"calls: basic sanity",
.insns = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment