Commit 2d863b14 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'Introduce bpf_ct_set_nat_info kfunc helper'

Lorenzo Bianconi says:

====================

Introduce bpf_ct_set_nat_info kfunc helper in order to set source and
destination nat addresses/ports in a new allocated ct entry not inserted
in the connection tracking table yet.
Introduce support for per-parameter trusted args.

Changes since v2:
- use int instead of a pointer for port in bpf_ct_set_nat_info signature
- modify KF_TRUSTED_ARGS definition in order to referenced pointer constraint
  just for PTR_TO_BTF_ID
- drop patch 2/4

Changes since v1:
- enable CONFIG_NF_NAT in tools/testing/selftests/bpf/config

Kumar Kartikeya Dwivedi (1):
  bpf: Tweak definition of KF_TRUSTED_ARGS
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 1d8b82c6 b06b45e8
...@@ -137,14 +137,22 @@ KF_ACQUIRE and KF_RET_NULL flags. ...@@ -137,14 +137,22 @@ KF_ACQUIRE and KF_RET_NULL flags.
-------------------------- --------------------------
The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It
indicates that the all pointer arguments will always be refcounted, and have indicates that the all pointer arguments will always have a guaranteed lifetime,
their offset set to 0. It can be used to enforce that a pointer to a refcounted and pointers to kernel objects are always passed to helpers in their unmodified
object acquired from a kfunc or BPF helper is passed as an argument to this form (as obtained from acquire kfuncs).
kfunc without any modifications (e.g. pointer arithmetic) such that it is
trusted and points to the original object. This flag is often used for kfuncs It can be used to enforce that a pointer to a refcounted object acquired from a
that operate (change some property, perform some operation) on an object that kfunc or BPF helper is passed as an argument to this kfunc without any
was obtained using an acquire kfunc. Such kfuncs need an unchanged pointer to modifications (e.g. pointer arithmetic) such that it is trusted and points to
ensure the integrity of the operation being performed on the expected object. the original object.
Meanwhile, it is also allowed pass pointers to normal memory to such kfuncs,
but those can have a non-zero offset.
This flag is often used for kfuncs that operate (change some property, perform
some operation) on an object that was obtained using an acquire kfunc. Such
kfuncs need an unchanged pointer to ensure the integrity of the operation being
performed on the expected object.
2.4.6 KF_SLEEPABLE flag 2.4.6 KF_SLEEPABLE flag
----------------------- -----------------------
......
...@@ -6227,7 +6227,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, ...@@ -6227,7 +6227,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
bool processing_call) bool processing_call)
{ {
enum bpf_prog_type prog_type = resolve_prog_type(env->prog); enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
bool rel = false, kptr_get = false, trusted_arg = false; bool rel = false, kptr_get = false, trusted_args = false;
bool sleepable = false; bool sleepable = false;
struct bpf_verifier_log *log = &env->log; struct bpf_verifier_log *log = &env->log;
u32 i, nargs, ref_id, ref_obj_id = 0; u32 i, nargs, ref_id, ref_obj_id = 0;
...@@ -6265,7 +6265,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, ...@@ -6265,7 +6265,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
/* Only kfunc can be release func */ /* Only kfunc can be release func */
rel = kfunc_meta->flags & KF_RELEASE; rel = kfunc_meta->flags & KF_RELEASE;
kptr_get = kfunc_meta->flags & KF_KPTR_GET; kptr_get = kfunc_meta->flags & KF_KPTR_GET;
trusted_arg = kfunc_meta->flags & KF_TRUSTED_ARGS; trusted_args = kfunc_meta->flags & KF_TRUSTED_ARGS;
sleepable = kfunc_meta->flags & KF_SLEEPABLE; sleepable = kfunc_meta->flags & KF_SLEEPABLE;
} }
...@@ -6276,6 +6276,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, ...@@ -6276,6 +6276,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
enum bpf_arg_type arg_type = ARG_DONTCARE; enum bpf_arg_type arg_type = ARG_DONTCARE;
u32 regno = i + 1; u32 regno = i + 1;
struct bpf_reg_state *reg = &regs[regno]; struct bpf_reg_state *reg = &regs[regno];
bool obj_ptr = false;
t = btf_type_skip_modifiers(btf, args[i].type, NULL); t = btf_type_skip_modifiers(btf, args[i].type, NULL);
if (btf_type_is_scalar(t)) { if (btf_type_is_scalar(t)) {
...@@ -6323,10 +6324,17 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, ...@@ -6323,10 +6324,17 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
return -EINVAL; return -EINVAL;
} }
/* These register types have special constraints wrt ref_obj_id
* and offset checks. The rest of trusted args don't.
*/
obj_ptr = reg->type == PTR_TO_CTX || reg->type == PTR_TO_BTF_ID ||
reg2btf_ids[base_type(reg->type)];
/* Check if argument must be a referenced pointer, args + i has /* Check if argument must be a referenced pointer, args + i has
* been verified to be a pointer (after skipping modifiers). * been verified to be a pointer (after skipping modifiers).
* PTR_TO_CTX is ok without having non-zero ref_obj_id.
*/ */
if (is_kfunc && trusted_arg && !reg->ref_obj_id) { if (is_kfunc && trusted_args && (obj_ptr && reg->type != PTR_TO_CTX) && !reg->ref_obj_id) {
bpf_log(log, "R%d must be referenced\n", regno); bpf_log(log, "R%d must be referenced\n", regno);
return -EINVAL; return -EINVAL;
} }
...@@ -6335,7 +6343,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, ...@@ -6335,7 +6343,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
ref_tname = btf_name_by_offset(btf, ref_t->name_off); ref_tname = btf_name_by_offset(btf, ref_t->name_off);
/* Trusted args have the same offset checks as release arguments */ /* Trusted args have the same offset checks as release arguments */
if (trusted_arg || (rel && reg->ref_obj_id)) if ((trusted_args && obj_ptr) || (rel && reg->ref_obj_id))
arg_type |= OBJ_RELEASE; arg_type |= OBJ_RELEASE;
ret = check_func_arg_reg_off(env, reg, regno, arg_type); ret = check_func_arg_reg_off(env, reg, regno, arg_type);
if (ret < 0) if (ret < 0)
...@@ -6435,7 +6443,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, ...@@ -6435,7 +6443,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
reg_ref_t->name_off); reg_ref_t->name_off);
if (!btf_struct_ids_match(log, reg_btf, reg_ref_id, if (!btf_struct_ids_match(log, reg_btf, reg_ref_id,
reg->off, btf, ref_id, reg->off, btf, ref_id,
trusted_arg || (rel && reg->ref_obj_id))) { trusted_args || (rel && reg->ref_obj_id))) {
bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
func_name, i, func_name, i,
btf_type_str(ref_t), ref_tname, btf_type_str(ref_t), ref_tname,
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_bpf.h> #include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_nat.h>
/* bpf_ct_opts - Options for CT lookup helpers /* bpf_ct_opts - Options for CT lookup helpers
* *
...@@ -137,7 +138,6 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple, ...@@ -137,7 +138,6 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
memset(&ct->proto, 0, sizeof(ct->proto)); memset(&ct->proto, 0, sizeof(ct->proto));
__nf_ct_set_timeout(ct, timeout * HZ); __nf_ct_set_timeout(ct, timeout * HZ);
ct->status |= IPS_CONFIRMED;
out: out:
if (opts->netns_id >= 0) if (opts->netns_id >= 0)
...@@ -390,6 +390,7 @@ struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) ...@@ -390,6 +390,7 @@ struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
struct nf_conn *nfct = (struct nf_conn *)nfct_i; struct nf_conn *nfct = (struct nf_conn *)nfct_i;
int err; int err;
nfct->status |= IPS_CONFIRMED;
err = nf_conntrack_hash_check_insert(nfct); err = nf_conntrack_hash_check_insert(nfct);
if (err < 0) { if (err < 0) {
nf_conntrack_free(nfct); nf_conntrack_free(nfct);
...@@ -475,6 +476,49 @@ int bpf_ct_change_status(struct nf_conn *nfct, u32 status) ...@@ -475,6 +476,49 @@ int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
return nf_ct_change_status_common(nfct, status); return nf_ct_change_status_common(nfct, status);
} }
/* bpf_ct_set_nat_info - Set source or destination nat address
*
* Set source or destination nat address of the newly allocated
* nf_conn before insertion. This must be invoked for referenced
* PTR_TO_BTF_ID to nf_conn___init.
*
* Parameters:
* @nfct - Pointer to referenced nf_conn object, obtained using
* bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
* @addr - Nat source/destination address
* @port - Nat source/destination port. Non-positive values are
* interpreted as select a random port.
* @manip - NF_NAT_MANIP_SRC or NF_NAT_MANIP_DST
*/
int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
union nf_inet_addr *addr, int port,
enum nf_nat_manip_type manip)
{
#if ((IS_MODULE(CONFIG_NF_NAT) && IS_MODULE(CONFIG_NF_CONNTRACK)) || \
IS_BUILTIN(CONFIG_NF_NAT))
struct nf_conn *ct = (struct nf_conn *)nfct;
u16 proto = nf_ct_l3num(ct);
struct nf_nat_range2 range;
if (proto != NFPROTO_IPV4 && proto != NFPROTO_IPV6)
return -EINVAL;
memset(&range, 0, sizeof(struct nf_nat_range2));
range.flags = NF_NAT_RANGE_MAP_IPS;
range.min_addr = *addr;
range.max_addr = range.min_addr;
if (port > 0) {
range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
range.min_proto.all = cpu_to_be16(port);
range.max_proto.all = range.min_proto.all;
}
return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0;
#else
return -EOPNOTSUPP;
#endif
}
__diag_pop() __diag_pop()
BTF_SET8_START(nf_ct_kfunc_set) BTF_SET8_START(nf_ct_kfunc_set)
...@@ -488,6 +532,7 @@ BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS) ...@@ -488,6 +532,7 @@ BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS)
BTF_SET8_END(nf_ct_kfunc_set) BTF_SET8_END(nf_ct_kfunc_set)
static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = { static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = {
......
...@@ -63,6 +63,7 @@ CONFIG_NF_CONNTRACK=y ...@@ -63,6 +63,7 @@ CONFIG_NF_CONNTRACK=y
CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_CONNTRACK_MARK=y
CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_DEFRAG_IPV4=y
CONFIG_NF_DEFRAG_IPV6=y CONFIG_NF_DEFRAG_IPV6=y
CONFIG_NF_NAT=y
CONFIG_RC_CORE=y CONFIG_RC_CORE=y
CONFIG_SECURITY=y CONFIG_SECURITY=y
CONFIG_SECURITYFS=y CONFIG_SECURITYFS=y
......
...@@ -27,6 +27,9 @@ enum { ...@@ -27,6 +27,9 @@ enum {
}; };
#define TIMEOUT_MS 3000 #define TIMEOUT_MS 3000
#define IPS_STATUS_MASK (IPS_CONFIRMED | IPS_SEEN_REPLY | \
IPS_SRC_NAT_DONE | IPS_DST_NAT_DONE | \
IPS_SRC_NAT | IPS_DST_NAT)
static int connect_to_server(int srv_fd) static int connect_to_server(int srv_fd)
{ {
...@@ -114,10 +117,11 @@ static void test_bpf_nf_ct(int mode) ...@@ -114,10 +117,11 @@ static void test_bpf_nf_ct(int mode)
ASSERT_GT(skel->bss->test_delta_timeout, 8, "Test for min ct timeout update"); ASSERT_GT(skel->bss->test_delta_timeout, 8, "Test for min ct timeout update");
ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update"); ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update");
ASSERT_EQ(skel->bss->test_insert_lookup_mark, 77, "Test for insert and lookup mark value"); ASSERT_EQ(skel->bss->test_insert_lookup_mark, 77, "Test for insert and lookup mark value");
ASSERT_EQ(skel->bss->test_status, IPS_CONFIRMED | IPS_SEEN_REPLY, ASSERT_EQ(skel->bss->test_status, IPS_STATUS_MASK, "Test for ct status update ");
"Test for ct status update ");
ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup"); ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup");
ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark"); ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark");
ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting");
ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting");
end: end:
if (srv_client_fd != -1) if (srv_client_fd != -1)
close(srv_client_fd); close(srv_client_fd);
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <vmlinux.h> #include <vmlinux.h>
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#define EAFNOSUPPORT 97 #define EAFNOSUPPORT 97
#define EPROTO 71 #define EPROTO 71
...@@ -24,6 +25,8 @@ int test_succ_lookup = -ENOENT; ...@@ -24,6 +25,8 @@ int test_succ_lookup = -ENOENT;
u32 test_delta_timeout = 0; u32 test_delta_timeout = 0;
u32 test_status = 0; u32 test_status = 0;
u32 test_insert_lookup_mark = 0; u32 test_insert_lookup_mark = 0;
int test_snat_addr = -EINVAL;
int test_dnat_addr = -EINVAL;
__be32 saddr = 0; __be32 saddr = 0;
__be16 sport = 0; __be16 sport = 0;
__be32 daddr = 0; __be32 daddr = 0;
...@@ -54,6 +57,8 @@ void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym; ...@@ -54,6 +57,8 @@ void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym;
int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym; int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym;
int bpf_ct_set_status(struct nf_conn *, u32) __ksym; int bpf_ct_set_status(struct nf_conn *, u32) __ksym;
int bpf_ct_change_status(struct nf_conn *, u32) __ksym; int bpf_ct_change_status(struct nf_conn *, u32) __ksym;
int bpf_ct_set_nat_info(struct nf_conn *, union nf_inet_addr *,
int port, enum nf_nat_manip_type) __ksym;
static __always_inline void static __always_inline void
nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
...@@ -141,11 +146,22 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, ...@@ -141,11 +146,22 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
sizeof(opts_def)); sizeof(opts_def));
if (ct) { if (ct) {
__u16 sport = bpf_get_prandom_u32();
__u16 dport = bpf_get_prandom_u32();
union nf_inet_addr saddr = {};
union nf_inet_addr daddr = {};
struct nf_conn *ct_ins; struct nf_conn *ct_ins;
bpf_ct_set_timeout(ct, 10000); bpf_ct_set_timeout(ct, 10000);
ct->mark = 77; ct->mark = 77;
/* snat */
saddr.ip = bpf_get_prandom_u32();
bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC);
/* dnat */
daddr.ip = bpf_get_prandom_u32();
bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST);
ct_ins = bpf_ct_insert_entry(ct); ct_ins = bpf_ct_insert_entry(ct);
if (ct_ins) { if (ct_ins) {
struct nf_conn *ct_lk; struct nf_conn *ct_lk;
...@@ -153,6 +169,17 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, ...@@ -153,6 +169,17 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
&opts_def, sizeof(opts_def)); &opts_def, sizeof(opts_def));
if (ct_lk) { if (ct_lk) {
struct nf_conntrack_tuple *tuple;
/* check snat and dnat addresses */
tuple = &ct_lk->tuplehash[IP_CT_DIR_REPLY].tuple;
if (tuple->dst.u3.ip == saddr.ip &&
tuple->dst.u.all == bpf_htons(sport))
test_snat_addr = 0;
if (tuple->src.u3.ip == daddr.ip &&
tuple->src.u.all == bpf_htons(dport))
test_dnat_addr = 0;
/* update ct entry timeout */ /* update ct entry timeout */
bpf_ct_change_timeout(ct_lk, 10000); bpf_ct_change_timeout(ct_lk, 10000);
test_delta_timeout = ct_lk->timeout - bpf_jiffies64(); test_delta_timeout = ct_lk->timeout - bpf_jiffies64();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment