Commit c4b5c5ba authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'Add skb + xdp dynptrs'

Joanne Koong says:

====================

This patchset is the 2nd in the dynptr series. The 1st can be found here [0].

This patchset adds skb and xdp type dynptrs, which have two main benefits for
packet parsing:
    * allowing operations on sizes that are not statically known at
      compile-time (eg variable-sized accesses).
    * more ergonomic and less brittle iteration through data (eg does not need
      manual if checking for being within bounds of data_end)

When comparing the differences in runtime for packet parsing without dynptrs
vs. with dynptrs, there is no noticeable difference. Patch 9 contains more
details as well as examples of how to use skb and xdp dynptrs.

[0] https://lore.kernel.org/bpf/20220523210712.3641569-1-joannelkoong@gmail.com/
---
Changelog:

v12 = https://lore.kernel.org/bpf/20230226085120.3907863-1-joannelkoong@gmail.com/
v12 -> v13:
    * Fix missing { } for case statement

v11 = https://lore.kernel.org/bpf/20230222060747.2562549-1-joannelkoong@gmail.com/
v11 -> v12:
    * Change constant mem size checking to use "__szk" kfunc annotation
      for slices
    * Use autoloading for success selftests

v10 = https://lore.kernel.org/bpf/20230216225524.1192789-1-joannelkoong@gmail.com/
v10 -> v11:
    * Reject bpf_dynptr_slice_rdwr() for non-writable progs at load time
      instead of runtime
    * Add additional patch (__uninit kfunc annotation)
    * Expand on documentation
    * Add bpf_dynptr_write() calls for persisting writes in tests

v9 = https://lore.kernel.org/bpf/20230127191703.3864860-1-joannelkoong@gmail.com/
v9 -> v10:
    * Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr interface
    * Add some more tests
    * Split up patchset into more parts to make it easier to review

v8 = https://lore.kernel.org/bpf/20230126233439.3739120-1-joannelkoong@gmail.com/
v8 -> v9:
    * Fix dynptr_get_type() to check non-stack dynptrs

v7 = https://lore.kernel.org/bpf/20221021011510.1890852-1-joannelkoong@gmail.com/
v7 -> v8:
    * Change helpers to kfuncs
    * Add 2 new patches (1/5 and 2/5)

v6 = https://lore.kernel.org/bpf/20220907183129.745846-1-joannelkoong@gmail.com/
v6 -> v7
    * Change bpf_dynptr_data() to return read-only data slices if the skb prog
      is read-only (Martin)
    * Add test "skb_invalid_write" to test that writes to rd-only data slices
      are rejected

v5 = https://lore.kernel.org/bpf/20220831183224.3754305-1-joannelkoong@gmail.com/
v5 -> v6
    * Address kernel test robot errors by static inlining

v4 = https://lore.kernel.org/bpf/20220822235649.2218031-1-joannelkoong@gmail.com/
v4 -> v5
    * Address kernel test robot errors for configs w/out CONFIG_NET set
    * For data slices, return PTR_TO_MEM instead of PTR_TO_PACKET (Kumar)
    * Split selftests into subtests (Andrii)
    * Remove insn patching. Use rdonly and rdwr protos for dynptr skb
      construction (Andrii)
    * bpf_dynptr_data() returns NULL for rd-only dynptrs. There will be a
      separate bpf_dynptr_data_rdonly() added later (Andrii and Kumar)

v3 = https://lore.kernel.org/bpf/20220822193442.657638-1-joannelkoong@gmail.com/
v3 -> v4
    * Forgot to commit --amend the kernel test robot error fixups

v2 = https://lore.kernel.org/bpf/20220811230501.2632393-1-joannelkoong@gmail.com/
v2 -> v3
    * Fix kernel test robot build test errors

v1 = https://lore.kernel.org/bpf/20220726184706.954822-1-joannelkoong@gmail.com/
v1 -> v2
  * Return data slices to rd-only skb dynptrs (Martin)
  * bpf_dynptr_write allows writes to frags for skb dynptrs, but always
    invalidates associated data slices (Martin)
  * Use switch casing instead of ifs (Andrii)
  * Use 0xFD for experimental kind number in the selftest (Zvi)
  * Put selftest conversions w/ dynptrs into new files (Alexei)
  * Add new selftest "test_cls_redirect_dynptr.c"
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents ae256f95 cfa7b011
......@@ -100,6 +100,23 @@ Hence, whenever a constant scalar argument is accepted by a kfunc which is not a
size parameter, and the value of the constant matters for program safety, __k
suffix should be used.
2.2.2 __uninit Annotation
--------------------
This annotation is used to indicate that the argument will be treated as
uninitialized.
An example is given below::
__bpf_kfunc int bpf_dynptr_from_skb(..., struct bpf_dynptr_kern *ptr__uninit)
{
...
}
Here, the dynptr will be treated as an uninitialized dynptr. Without this
annotation, the verifier will reject the program if the dynptr passed in is
not initialized.
.. _BPF_kfunc_nodef:
2.3 Using an existing kernel function
......
......@@ -607,11 +607,18 @@ enum bpf_type_flag {
*/
NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS),
/* DYNPTR points to sk_buff */
DYNPTR_TYPE_SKB = BIT(15 + BPF_BASE_TYPE_BITS),
/* DYNPTR points to xdp_buff */
DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS),
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF)
#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \
| DYNPTR_TYPE_XDP)
/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
......@@ -1124,6 +1131,37 @@ static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
return bpf_func(ctx, insnsi);
}
/* the implementation of the opaque uapi struct bpf_dynptr */
struct bpf_dynptr_kern {
void *data;
/* Size represents the number of usable bytes of dynptr data.
* If for example the offset is at 4 for a local dynptr whose data is
* of type u64, the number of usable bytes is 4.
*
* The upper 8 bits are reserved. It is as follows:
* Bits 0 - 23 = size
* Bits 24 - 30 = dynptr type
* Bit 31 = whether dynptr is read-only
*/
u32 size;
u32 offset;
} __aligned(8);
enum bpf_dynptr_type {
BPF_DYNPTR_TYPE_INVALID,
/* Points to memory that is local to the bpf program */
BPF_DYNPTR_TYPE_LOCAL,
/* Underlying data is a ringbuf record */
BPF_DYNPTR_TYPE_RINGBUF,
/* Underlying data is a sk_buff */
BPF_DYNPTR_TYPE_SKB,
/* Underlying data is a xdp_buff */
BPF_DYNPTR_TYPE_XDP,
};
int bpf_dynptr_check_size(u32 size);
u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr);
#ifdef CONFIG_BPF_JIT
int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
......@@ -2266,6 +2304,11 @@ static inline bool has_current_bpf_ctx(void)
}
void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog);
void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
enum bpf_dynptr_type type, u32 offset, u32 size);
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
......@@ -2495,6 +2538,19 @@ static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog)
static inline void bpf_cgrp_storage_free(struct cgroup *cgroup)
{
}
static inline void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
enum bpf_dynptr_type type, u32 offset, u32 size)
{
}
static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
{
}
static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
{
}
#endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
......@@ -2801,6 +2857,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
struct bpf_insn *insn_buf,
struct bpf_prog *prog,
u32 *target_size);
int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
struct bpf_dynptr_kern *ptr);
#else
static inline bool bpf_sock_common_is_valid_access(int off, int size,
enum bpf_access_type type,
......@@ -2822,6 +2880,11 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
{
return 0;
}
static inline int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
struct bpf_dynptr_kern *ptr)
{
return -EOPNOTSUPP;
}
#endif
#ifdef CONFIG_INET
......@@ -2913,36 +2976,6 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
u32 num_args, struct bpf_bprintf_data *data);
void bpf_bprintf_cleanup(struct bpf_bprintf_data *data);
/* the implementation of the opaque uapi struct bpf_dynptr */
struct bpf_dynptr_kern {
void *data;
/* Size represents the number of usable bytes of dynptr data.
* If for example the offset is at 4 for a local dynptr whose data is
* of type u64, the number of usable bytes is 4.
*
* The upper 8 bits are reserved. It is as follows:
* Bits 0 - 23 = size
* Bits 24 - 30 = dynptr type
* Bit 31 = whether dynptr is read-only
*/
u32 size;
u32 offset;
} __aligned(8);
enum bpf_dynptr_type {
BPF_DYNPTR_TYPE_INVALID,
/* Points to memory that is local to the bpf program */
BPF_DYNPTR_TYPE_LOCAL,
/* Underlying data is a kernel-produced ringbuf record */
BPF_DYNPTR_TYPE_RINGBUF,
};
void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
enum bpf_dynptr_type type, u32 offset, u32 size);
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
int bpf_dynptr_check_size(u32 size);
u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr);
#ifdef CONFIG_BPF_LSM
void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype);
void bpf_cgroup_atype_put(int cgroup_atype);
......
......@@ -616,9 +616,6 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
enum bpf_arg_type arg_type);
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size);
struct bpf_call_arg_meta;
int process_dynptr_func(struct bpf_verifier_env *env, int regno,
enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta);
/* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
......
......@@ -1542,4 +1542,50 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index
return XDP_REDIRECT;
}
#ifdef CONFIG_NET
int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len);
int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
u32 len, u64 flags);
int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len);
void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
void *buf, unsigned long len, bool flush);
#else /* CONFIG_NET */
static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
void *to, u32 len)
{
return -EOPNOTSUPP;
}
static inline int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset,
const void *from, u32 len, u64 flags)
{
return -EOPNOTSUPP;
}
static inline int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset,
void *buf, u32 len)
{
return -EOPNOTSUPP;
}
static inline int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset,
void *buf, u32 len)
{
return -EOPNOTSUPP;
}
static inline void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
{
return NULL;
}
static inline void *bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf,
unsigned long len, bool flush)
{
return NULL;
}
#endif /* CONFIG_NET */
#endif /* __LINUX_FILTER_H__ */
......@@ -5325,11 +5325,22 @@ union bpf_attr {
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
* *flags* is currently unused.
*
* *flags* must be 0 except for skb-type dynptrs.
*
* For skb-type dynptrs:
* * All data slices of the dynptr are automatically
* invalidated after **bpf_dynptr_write**\ (). This is
* because writing may pull the skb and change the
* underlying packet buffer.
*
* * For *flags*, please see the flags accepted by
* **bpf_skb_store_bytes**\ ().
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
* is a read-only dynptr or if *flags* is not 0.
* is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
* other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
......@@ -5337,6 +5348,9 @@ union bpf_attr {
*
* *len* must be a statically known value. The returned data slice
* is invalidated whenever the dynptr is invalidated.
*
* skb and xdp type dynptrs may not use bpf_dynptr_data. They should
* instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr.
* Return
* Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length
......
......@@ -207,6 +207,11 @@ enum btf_kfunc_hook {
BTF_KFUNC_HOOK_TRACING,
BTF_KFUNC_HOOK_SYSCALL,
BTF_KFUNC_HOOK_FMODRET,
BTF_KFUNC_HOOK_CGROUP_SKB,
BTF_KFUNC_HOOK_SCHED_ACT,
BTF_KFUNC_HOOK_SK_SKB,
BTF_KFUNC_HOOK_SOCKET_FILTER,
BTF_KFUNC_HOOK_LWT,
BTF_KFUNC_HOOK_MAX,
};
......@@ -5683,6 +5688,10 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
* int socket_filter_bpf_prog(struct __sk_buff *skb)
* { // no fields of skb are ever used }
*/
if (strcmp(ctx_tname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0)
return ctx_type;
if (strcmp(ctx_tname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0)
return ctx_type;
if (strcmp(ctx_tname, tname)) {
/* bpf_user_pt_regs_t is a typedef, so resolve it to
* underlying struct and check name again
......@@ -7704,6 +7713,19 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
return BTF_KFUNC_HOOK_TRACING;
case BPF_PROG_TYPE_SYSCALL:
return BTF_KFUNC_HOOK_SYSCALL;
case BPF_PROG_TYPE_CGROUP_SKB:
return BTF_KFUNC_HOOK_CGROUP_SKB;
case BPF_PROG_TYPE_SCHED_ACT:
return BTF_KFUNC_HOOK_SCHED_ACT;
case BPF_PROG_TYPE_SK_SKB:
return BTF_KFUNC_HOOK_SK_SKB;
case BPF_PROG_TYPE_SOCKET_FILTER:
return BTF_KFUNC_HOOK_SOCKET_FILTER;
case BPF_PROG_TYPE_LWT_OUT:
case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_XMIT:
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
return BTF_KFUNC_HOOK_LWT;
default:
return BTF_KFUNC_HOOK_MAX;
}
......
......@@ -1420,11 +1420,21 @@ static bool bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
return ptr->size & DYNPTR_RDONLY_BIT;
}
void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
{
ptr->size |= DYNPTR_RDONLY_BIT;
}
static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type)
{
ptr->size |= type << DYNPTR_TYPE_SHIFT;
}
static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr)
{
return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
}
u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
{
return ptr->size & DYNPTR_SIZE_MASK;
......@@ -1497,6 +1507,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
u32, offset, u64, flags)
{
enum bpf_dynptr_type type;
int err;
if (!src->data || flags)
......@@ -1506,13 +1517,25 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern
if (err)
return err;
type = bpf_dynptr_get_type(src);
switch (type) {
case BPF_DYNPTR_TYPE_LOCAL:
case BPF_DYNPTR_TYPE_RINGBUF:
/* Source and destination may possibly overlap, hence use memmove to
* copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
* pointing to overlapping PTR_TO_MAP_VALUE regions.
*/
memmove(dst, src->data + src->offset + offset, len);
return 0;
case BPF_DYNPTR_TYPE_SKB:
return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
case BPF_DYNPTR_TYPE_XDP:
return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len);
default:
WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
return -EFAULT;
}
}
static const struct bpf_func_proto bpf_dynptr_read_proto = {
......@@ -1529,22 +1552,40 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
u32, len, u64, flags)
{
enum bpf_dynptr_type type;
int err;
if (!dst->data || flags || bpf_dynptr_is_rdonly(dst))
if (!dst->data || bpf_dynptr_is_rdonly(dst))
return -EINVAL;
err = bpf_dynptr_check_off_len(dst, offset, len);
if (err)
return err;
type = bpf_dynptr_get_type(dst);
switch (type) {
case BPF_DYNPTR_TYPE_LOCAL:
case BPF_DYNPTR_TYPE_RINGBUF:
if (flags)
return -EINVAL;
/* Source and destination may possibly overlap, hence use memmove to
* copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
* pointing to overlapping PTR_TO_MAP_VALUE regions.
*/
memmove(dst->data + dst->offset + offset, src, len);
return 0;
case BPF_DYNPTR_TYPE_SKB:
return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len,
flags);
case BPF_DYNPTR_TYPE_XDP:
if (flags)
return -EINVAL;
return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len);
default:
WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
return -EFAULT;
}
}
static const struct bpf_func_proto bpf_dynptr_write_proto = {
......@@ -1560,6 +1601,7 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {
BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
{
enum bpf_dynptr_type type;
int err;
if (!ptr->data)
......@@ -1572,7 +1614,20 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3
if (bpf_dynptr_is_rdonly(ptr))
return 0;
type = bpf_dynptr_get_type(ptr);
switch (type) {
case BPF_DYNPTR_TYPE_LOCAL:
case BPF_DYNPTR_TYPE_RINGBUF:
return (unsigned long)(ptr->data + ptr->offset + offset);
case BPF_DYNPTR_TYPE_SKB:
case BPF_DYNPTR_TYPE_XDP:
/* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */
return 0;
default:
WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
return 0;
}
}
static const struct bpf_func_proto bpf_dynptr_data_proto = {
......@@ -2138,6 +2193,142 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
return p;
}
/**
* bpf_dynptr_slice - Obtain a read-only pointer to the dynptr data.
*
* For non-skb and non-xdp type dynptrs, there is no difference between
* bpf_dynptr_slice and bpf_dynptr_data.
*
* If the intention is to write to the data slice, please use
* bpf_dynptr_slice_rdwr.
*
* The user must check that the returned pointer is not null before using it.
*
* Please note that in the case of skb and xdp dynptrs, bpf_dynptr_slice
* does not change the underlying packet data pointers, so a call to
* bpf_dynptr_slice will not invalidate any ctx->data/data_end pointers in
* the bpf program.
*
* @ptr: The dynptr whose data slice to retrieve
* @offset: Offset into the dynptr
* @buffer: User-provided buffer to copy contents into
* @buffer__szk: Size (in bytes) of the buffer. This is the length of the
* requested slice. This must be a constant.
*
* @returns: NULL if the call failed (eg invalid dynptr), pointer to a read-only
* data slice (can be either direct pointer to the data or a pointer to the user
* provided buffer, with its contents containing the data, if unable to obtain
* direct pointer)
*/
__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset,
void *buffer, u32 buffer__szk)
{
enum bpf_dynptr_type type;
u32 len = buffer__szk;
int err;
if (!ptr->data)
return 0;
err = bpf_dynptr_check_off_len(ptr, offset, len);
if (err)
return 0;
type = bpf_dynptr_get_type(ptr);
switch (type) {
case BPF_DYNPTR_TYPE_LOCAL:
case BPF_DYNPTR_TYPE_RINGBUF:
return ptr->data + ptr->offset + offset;
case BPF_DYNPTR_TYPE_SKB:
return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer);
case BPF_DYNPTR_TYPE_XDP:
{
void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
if (xdp_ptr)
return xdp_ptr;
bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer, len, false);
return buffer;
}
default:
WARN_ONCE(true, "unknown dynptr type %d\n", type);
return 0;
}
}
/**
* bpf_dynptr_slice_rdwr - Obtain a writable pointer to the dynptr data.
*
* For non-skb and non-xdp type dynptrs, there is no difference between
* bpf_dynptr_slice and bpf_dynptr_data.
*
* The returned pointer is writable and may point to either directly the dynptr
* data at the requested offset or to the buffer if unable to obtain a direct
* data pointer to (example: the requested slice is to the paged area of an skb
* packet). In the case where the returned pointer is to the buffer, the user
* is responsible for persisting writes through calling bpf_dynptr_write(). This
* usually looks something like this pattern:
*
* struct eth_hdr *eth = bpf_dynptr_slice_rdwr(&dynptr, 0, buffer, sizeof(buffer));
* if (!eth)
* return TC_ACT_SHOT;
*
* // mutate eth header //
*
* if (eth == buffer)
* bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0);
*
* Please note that, as in the example above, the user must check that the
* returned pointer is not null before using it.
*
* Please also note that in the case of skb and xdp dynptrs, bpf_dynptr_slice_rdwr
* does not change the underlying packet data pointers, so a call to
* bpf_dynptr_slice_rdwr will not invalidate any ctx->data/data_end pointers in
* the bpf program.
*
* @ptr: The dynptr whose data slice to retrieve
* @offset: Offset into the dynptr
* @buffer: User-provided buffer to copy contents into
* @buffer__szk: Size (in bytes) of the buffer. This is the length of the
* requested slice. This must be a constant.
*
* @returns: NULL if the call failed (eg invalid dynptr), pointer to a
* data slice (can be either direct pointer to the data or a pointer to the user
* provided buffer, with its contents containing the data, if unable to obtain
* direct pointer)
*/
__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset,
void *buffer, u32 buffer__szk)
{
if (!ptr->data || bpf_dynptr_is_rdonly(ptr))
return 0;
/* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice.
*
* For skb-type dynptrs, it is safe to write into the returned pointer
* if the bpf program allows skb data writes. There are two possiblities
* that may occur when calling bpf_dynptr_slice_rdwr:
*
* 1) The requested slice is in the head of the skb. In this case, the
* returned pointer is directly to skb data, and if the skb is cloned, the
* verifier will have uncloned it (see bpf_unclone_prologue()) already.
* The pointer can be directly written into.
*
* 2) Some portion of the requested slice is in the paged buffer area.
* In this case, the requested data will be copied out into the buffer
* and the returned pointer will be a pointer to the buffer. The skb
* will not be pulled. To persist the write, the user will need to call
* bpf_dynptr_write(), which will pull the skb and commit the write.
*
* Similarly for xdp programs, if the requested slice is not across xdp
* fragments, then a direct pointer will be returned, otherwise the data
* will be copied out into the buffer and the user will need to call
* bpf_dynptr_write() to commit changes.
*/
return bpf_dynptr_slice(ptr, offset, buffer, buffer__szk);
}
__bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
{
return obj;
......@@ -2207,6 +2398,8 @@ BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx)
BTF_ID_FLAGS(func, bpf_rdonly_cast)
BTF_ID_FLAGS(func, bpf_rcu_read_lock)
BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
BTF_SET8_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
......
This diff is collapsed.
......@@ -1721,6 +1721,12 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.arg5_type = ARG_ANYTHING,
};
int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
u32 len, u64 flags)
{
return ____bpf_skb_store_bytes(skb, offset, from, len, flags);
}
BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
void *, to, u32, len)
{
......@@ -1751,6 +1757,11 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
.arg4_type = ARG_CONST_SIZE,
};
int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
{
return ____bpf_skb_load_bytes(skb, offset, to, len);
}
BPF_CALL_4(bpf_flow_dissector_load_bytes,
const struct bpf_flow_dissector *, ctx, u32, offset,
void *, to, u32, len)
......@@ -3883,7 +3894,7 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
.arg2_type = ARG_ANYTHING,
};
static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
void *buf, unsigned long len, bool flush)
{
unsigned long ptr_len, ptr_off = 0;
......@@ -3930,7 +3941,7 @@ static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
}
}
static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
u32 size = xdp->data_end - xdp->data;
......@@ -3988,6 +3999,11 @@ static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
.arg4_type = ARG_CONST_SIZE,
};
int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len)
{
return ____bpf_xdp_load_bytes(xdp, offset, buf, len);
}
BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
void *, buf, u32, len)
{
......@@ -4015,6 +4031,11 @@ static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
.arg4_type = ARG_CONST_SIZE,
};
int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len)
{
return ____bpf_xdp_store_bytes(xdp, offset, buf, len);
}
static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
......@@ -11621,3 +11642,82 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
return func;
}
__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
"Global functions as their definitions will be in vmlinux BTF");
__bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags,
struct bpf_dynptr_kern *ptr__uninit)
{
if (flags) {
bpf_dynptr_set_null(ptr__uninit);
return -EINVAL;
}
bpf_dynptr_init(ptr__uninit, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len);
return 0;
}
__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags,
struct bpf_dynptr_kern *ptr__uninit)
{
if (flags) {
bpf_dynptr_set_null(ptr__uninit);
return -EINVAL;
}
bpf_dynptr_init(ptr__uninit, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp));
return 0;
}
__diag_pop();
int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
struct bpf_dynptr_kern *ptr__uninit)
{
int err;
err = bpf_dynptr_from_skb(skb, flags, ptr__uninit);
if (err)
return err;
bpf_dynptr_set_rdonly(ptr__uninit);
return 0;
}
BTF_SET8_START(bpf_kfunc_check_set_skb)
BTF_ID_FLAGS(func, bpf_dynptr_from_skb)
BTF_SET8_END(bpf_kfunc_check_set_skb)
BTF_SET8_START(bpf_kfunc_check_set_xdp)
BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
BTF_SET8_END(bpf_kfunc_check_set_xdp)
static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
.owner = THIS_MODULE,
.set = &bpf_kfunc_check_set_skb,
};
static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = {
.owner = THIS_MODULE,
.set = &bpf_kfunc_check_set_xdp,
};
static int __init bpf_kfunc_init(void)
{
int ret;
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_ACT, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SK_SKB, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCKET_FILTER, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_OUT, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_IN, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
}
late_initcall(bpf_kfunc_init);
......@@ -5325,11 +5325,22 @@ union bpf_attr {
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
* *flags* is currently unused.
*
* *flags* must be 0 except for skb-type dynptrs.
*
* For skb-type dynptrs:
* * All data slices of the dynptr are automatically
* invalidated after **bpf_dynptr_write**\ (). This is
* because writing may pull the skb and change the
* underlying packet buffer.
*
* * For *flags*, please see the flags accepted by
* **bpf_skb_store_bytes**\ ().
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
* is a read-only dynptr or if *flags* is not 0.
* is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
* other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
......@@ -5337,6 +5348,9 @@ union bpf_attr {
*
* *len* must be a statically known value. The returned data slice
* is invalidated whenever the dynptr is invalidated.
*
* skb and xdp type dynptrs may not use bpf_dynptr_data. They should
* instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr.
* Return
* Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length
......
......@@ -4,6 +4,8 @@ bloom_filter_map # failed to find kernel BTF type ID of
bpf_cookie # failed to open_and_load program: -524 (trampoline)
bpf_loop # attaches to __x64_sys_nanosleep
cgrp_local_storage # prog_attach unexpected error: -524 (trampoline)
dynptr/test_dynptr_skb_data
dynptr/test_skb_readonly
fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline)
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95
......
#ifndef __BPF_KFUNCS__
#define __BPF_KFUNCS__
/* Description
* Initializes an skb-type dynptr
* Returns
* Error code
*/
extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
struct bpf_dynptr *ptr__uninit) __ksym;
/* Description
* Initializes an xdp-type dynptr
* Returns
* Error code
*/
extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags,
struct bpf_dynptr *ptr__uninit) __ksym;
/* Description
* Obtain a read-only pointer to the dynptr's data
* Returns
* Either a direct pointer to the dynptr data or a pointer to the user-provided
* buffer if unable to obtain a direct pointer
*/
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
void *buffer, __u32 buffer__szk) __ksym;
/* Description
* Obtain a read-write pointer to the dynptr's data
* Returns
* Either a direct pointer to the dynptr data or a pointer to the user-provided
* buffer if unable to obtain a direct pointer
*/
extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset,
void *buffer, __u32 buffer__szk) __ksym;
#endif
......@@ -13,6 +13,7 @@
#include "progs/test_cls_redirect.h"
#include "test_cls_redirect.skel.h"
#include "test_cls_redirect_dynptr.skel.h"
#include "test_cls_redirect_subprogs.skel.h"
#define ENCAP_IP INADDR_LOOPBACK
......@@ -446,6 +447,28 @@ static void test_cls_redirect_common(struct bpf_program *prog)
close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0]));
}
static void test_cls_redirect_dynptr(void)
{
struct test_cls_redirect_dynptr *skel;
int err;
skel = test_cls_redirect_dynptr__open();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
err = test_cls_redirect_dynptr__load(skel);
if (!ASSERT_OK(err, "skel_load"))
goto cleanup;
test_cls_redirect_common(skel->progs.cls_redirect);
cleanup:
test_cls_redirect_dynptr__destroy(skel);
}
static void test_cls_redirect_inlined(void)
{
struct test_cls_redirect *skel;
......@@ -496,4 +519,6 @@ void test_cls_redirect(void)
test_cls_redirect_inlined();
if (test__start_subtest("cls_redirect_subprogs"))
test_cls_redirect_subprogs();
if (test__start_subtest("cls_redirect_dynptr"))
test_cls_redirect_dynptr();
}
......@@ -2,20 +2,32 @@
/* Copyright (c) 2022 Facebook */
#include <test_progs.h>
#include <network_helpers.h>
#include "dynptr_fail.skel.h"
#include "dynptr_success.skel.h"
static const char * const success_tests[] = {
"test_read_write",
"test_data_slice",
"test_ringbuf",
enum test_setup_type {
SETUP_SYSCALL_SLEEP,
SETUP_SKB_PROG,
};
static void verify_success(const char *prog_name)
static struct {
const char *prog_name;
enum test_setup_type type;
} success_tests[] = {
{"test_read_write", SETUP_SYSCALL_SLEEP},
{"test_dynptr_data", SETUP_SYSCALL_SLEEP},
{"test_ringbuf", SETUP_SYSCALL_SLEEP},
{"test_skb_readonly", SETUP_SKB_PROG},
{"test_dynptr_skb_data", SETUP_SKB_PROG},
};
static void verify_success(const char *prog_name, enum test_setup_type setup_type)
{
struct dynptr_success *skel;
struct bpf_program *prog;
struct bpf_link *link;
int err;
skel = dynptr_success__open();
if (!ASSERT_OK_PTR(skel, "dynptr_success__open"))
......@@ -23,23 +35,53 @@ static void verify_success(const char *prog_name)
skel->bss->pid = getpid();
dynptr_success__load(skel);
if (!ASSERT_OK_PTR(skel, "dynptr_success__load"))
goto cleanup;
prog = bpf_object__find_program_by_name(skel->obj, prog_name);
if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
goto cleanup;
bpf_program__set_autoload(prog, true);
err = dynptr_success__load(skel);
if (!ASSERT_OK(err, "dynptr_success__load"))
goto cleanup;
switch (setup_type) {
case SETUP_SYSCALL_SLEEP:
link = bpf_program__attach(prog);
if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
goto cleanup;
usleep(1);
ASSERT_EQ(skel->bss->err, 0, "err");
bpf_link__destroy(link);
break;
case SETUP_SKB_PROG:
{
int prog_fd;
char buf[64];
LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.data_out = buf,
.data_size_out = sizeof(buf),
.repeat = 1,
);
prog_fd = bpf_program__fd(prog);
if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
goto cleanup;
err = bpf_prog_test_run_opts(prog_fd, &topts);
if (!ASSERT_OK(err, "test_run"))
goto cleanup;
break;
}
}
ASSERT_EQ(skel->bss->err, 0, "err");
cleanup:
dynptr_success__destroy(skel);
......@@ -50,10 +92,10 @@ void test_dynptr(void)
int i;
for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
if (!test__start_subtest(success_tests[i]))
if (!test__start_subtest(success_tests[i].prog_name))
continue;
verify_success(success_tests[i]);
verify_success(success_tests[i].prog_name, success_tests[i].type);
}
RUN_TESTS(dynptr_fail);
......
......@@ -93,4 +93,6 @@ void test_l4lb_all(void)
test_l4lb("test_l4lb.bpf.o");
if (test__start_subtest("l4lb_noinline"))
test_l4lb("test_l4lb_noinline.bpf.o");
if (test__start_subtest("l4lb_noinline_dynptr"))
test_l4lb("test_l4lb_noinline_dynptr.bpf.o");
}
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <network_helpers.h>
#include "test_parse_tcp_hdr_opt.skel.h"
#include "test_parse_tcp_hdr_opt_dynptr.skel.h"
#include "test_tcp_hdr_options.h"
struct test_pkt {
struct ipv6_packet pk6_v6;
u8 options[16];
} __packed;
struct test_pkt pkt = {
.pk6_v6.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
.pk6_v6.iph.nexthdr = IPPROTO_TCP,
.pk6_v6.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
.pk6_v6.tcp.urg_ptr = 123,
.pk6_v6.tcp.doff = 9, /* 16 bytes of options */
.options = {
TCPOPT_MSS, 4, 0x05, 0xB4, TCPOPT_NOP, TCPOPT_NOP,
0, 6, 0xBB, 0xBB, 0xBB, 0xBB, TCPOPT_EOL
},
};
static void test_parse_opt(void)
{
struct test_parse_tcp_hdr_opt *skel;
struct bpf_program *prog;
char buf[128];
int err;
LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt,
.data_size_in = sizeof(pkt),
.data_out = buf,
.data_size_out = sizeof(buf),
.repeat = 3,
);
skel = test_parse_tcp_hdr_opt__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
pkt.options[6] = skel->rodata->tcp_hdr_opt_kind_tpr;
prog = skel->progs.xdp_ingress_v6;
err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts);
ASSERT_OK(err, "ipv6 test_run");
ASSERT_EQ(topts.retval, XDP_PASS, "ipv6 test_run retval");
ASSERT_EQ(skel->bss->server_id, 0xBBBBBBBB, "server id");
test_parse_tcp_hdr_opt__destroy(skel);
}
static void test_parse_opt_dynptr(void)
{
struct test_parse_tcp_hdr_opt_dynptr *skel;
struct bpf_program *prog;
char buf[128];
int err;
LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt,
.data_size_in = sizeof(pkt),
.data_out = buf,
.data_size_out = sizeof(buf),
.repeat = 3,
);
skel = test_parse_tcp_hdr_opt_dynptr__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
pkt.options[6] = skel->rodata->tcp_hdr_opt_kind_tpr;
prog = skel->progs.xdp_ingress_v6;
err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts);
ASSERT_OK(err, "ipv6 test_run");
ASSERT_EQ(topts.retval, XDP_PASS, "ipv6 test_run retval");
ASSERT_EQ(skel->bss->server_id, 0xBBBBBBBB, "server id");
test_parse_tcp_hdr_opt_dynptr__destroy(skel);
}
void test_parse_tcp_hdr_opt(void)
{
if (test__start_subtest("parse_tcp_hdr_opt"))
test_parse_opt();
if (test__start_subtest("parse_tcp_hdr_opt_dynptr"))
test_parse_opt_dynptr();
}
......@@ -4,11 +4,10 @@
#define IFINDEX_LO 1
#define XDP_FLAGS_REPLACE (1U << 4)
void serial_test_xdp_attach(void)
static void test_xdp_attach(const char *file)
{
__u32 duration = 0, id1, id2, id0 = 0, len;
struct bpf_object *obj1, *obj2, *obj3;
const char *file = "./test_xdp.bpf.o";
struct bpf_prog_info info = {};
int err, fd1, fd2, fd3;
LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
......@@ -85,3 +84,11 @@ void serial_test_xdp_attach(void)
out_1:
bpf_object__close(obj1);
}
void serial_test_xdp_attach(void)
{
if (test__start_subtest("xdp_attach"))
test_xdp_attach("./test_xdp.bpf.o");
if (test__start_subtest("xdp_attach_dynptr"))
test_xdp_attach("./test_xdp_dynptr.bpf.o");
}
......@@ -5,7 +5,9 @@
#include <string.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/if_ether.h>
#include "bpf_misc.h"
#include "bpf_kfuncs.h"
char _license[] SEC("license") = "GPL";
......@@ -244,6 +246,27 @@ int data_slice_out_of_bounds_ringbuf(void *ctx)
return 0;
}
/* A data slice can't be accessed out of bounds */
SEC("?tc")
__failure __msg("value is outside of the allowed memory range")
int data_slice_out_of_bounds_skb(struct __sk_buff *skb)
{
struct bpf_dynptr ptr;
struct ethhdr *hdr;
char buffer[sizeof(*hdr)] = {};
bpf_dynptr_from_skb(skb, 0, &ptr);
hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
if (!hdr)
return SK_DROP;
/* this should fail */
*(__u8*)(hdr + 1) = 1;
return SK_PASS;
}
SEC("?raw_tp")
__failure __msg("value is outside of the allowed memory range")
int data_slice_out_of_bounds_map_value(void *ctx)
......@@ -399,7 +422,6 @@ int invalid_helper2(void *ctx)
/* this should fail */
bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0, 0);
return 0;
}
......@@ -1044,6 +1066,193 @@ int dynptr_read_into_slot(void *ctx)
return 0;
}
/* bpf_dynptr_slice()s are read-only and cannot be written to */
SEC("?tc")
__failure __msg("R0 cannot write into rdonly_mem")
int skb_invalid_slice_write(struct __sk_buff *skb)
{
struct bpf_dynptr ptr;
struct ethhdr *hdr;
char buffer[sizeof(*hdr)] = {};
bpf_dynptr_from_skb(skb, 0, &ptr);
hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
if (!hdr)
return SK_DROP;
/* this should fail */
hdr->h_proto = 1;
return SK_PASS;
}
/* The read-only data slice is invalidated whenever a helper changes packet data */
SEC("?tc")
__failure __msg("invalid mem access 'scalar'")
int skb_invalid_data_slice1(struct __sk_buff *skb)
{
struct bpf_dynptr ptr;
struct ethhdr *hdr;
char buffer[sizeof(*hdr)] = {};
bpf_dynptr_from_skb(skb, 0, &ptr);
hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
if (!hdr)
return SK_DROP;
val = hdr->h_proto;
if (bpf_skb_pull_data(skb, skb->len))
return SK_DROP;
/* this should fail */
val = hdr->h_proto;
return SK_PASS;
}
/* The read-write data slice is invalidated whenever a helper changes packet data */
SEC("?tc")
__failure __msg("invalid mem access 'scalar'")
int skb_invalid_data_slice2(struct __sk_buff *skb)
{
struct bpf_dynptr ptr;
struct ethhdr *hdr;
char buffer[sizeof(*hdr)] = {};
bpf_dynptr_from_skb(skb, 0, &ptr);
hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
if (!hdr)
return SK_DROP;
hdr->h_proto = 123;
if (bpf_skb_pull_data(skb, skb->len))
return SK_DROP;
/* this should fail */
hdr->h_proto = 1;
return SK_PASS;
}
/* The read-only data slice is invalidated whenever bpf_dynptr_write() is called */
SEC("?tc")
__failure __msg("invalid mem access 'scalar'")
int skb_invalid_data_slice3(struct __sk_buff *skb)
{
char write_data[64] = "hello there, world!!";
struct bpf_dynptr ptr;
struct ethhdr *hdr;
char buffer[sizeof(*hdr)] = {};
bpf_dynptr_from_skb(skb, 0, &ptr);
hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
if (!hdr)
return SK_DROP;
val = hdr->h_proto;
bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
/* this should fail */
val = hdr->h_proto;
return SK_PASS;
}
/* The read-write data slice is invalidated whenever bpf_dynptr_write() is called */
SEC("?tc")
__failure __msg("invalid mem access 'scalar'")
int skb_invalid_data_slice4(struct __sk_buff *skb)
{
char write_data[64] = "hello there, world!!";
struct bpf_dynptr ptr;
struct ethhdr *hdr;
char buffer[sizeof(*hdr)] = {};
bpf_dynptr_from_skb(skb, 0, &ptr);
hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
if (!hdr)
return SK_DROP;
hdr->h_proto = 123;
bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
/* this should fail */
hdr->h_proto = 1;
return SK_PASS;
}
/* The read-only data slice is invalidated whenever a helper changes packet data */
SEC("?xdp")
__failure __msg("invalid mem access 'scalar'")
int xdp_invalid_data_slice1(struct xdp_md *xdp)
{
struct bpf_dynptr ptr;
struct ethhdr *hdr;
char buffer[sizeof(*hdr)] = {};
bpf_dynptr_from_xdp(xdp, 0, &ptr);
hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
if (!hdr)
return SK_DROP;
val = hdr->h_proto;
if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr)))
return XDP_DROP;
/* this should fail */
val = hdr->h_proto;
return XDP_PASS;
}
/* The read-write data slice is invalidated whenever a helper changes packet data */
SEC("?xdp")
__failure __msg("invalid mem access 'scalar'")
int xdp_invalid_data_slice2(struct xdp_md *xdp)
{
struct bpf_dynptr ptr;
struct ethhdr *hdr;
char buffer[sizeof(*hdr)] = {};
bpf_dynptr_from_xdp(xdp, 0, &ptr);
hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
if (!hdr)
return SK_DROP;
hdr->h_proto = 9;
if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr)))
return XDP_DROP;
/* this should fail */
hdr->h_proto = 1;
return XDP_PASS;
}
/* Only supported prog type can create skb-type dynptrs */
SEC("?raw_tp")
__failure __msg("calling kernel function bpf_dynptr_from_skb is not allowed")
int skb_invalid_ctx(void *ctx)
{
struct bpf_dynptr ptr;
/* this should fail */
bpf_dynptr_from_skb(ctx, 0, &ptr);
return 0;
}
/* Reject writes to dynptr slot for uninit arg */
SEC("?raw_tp")
__failure __msg("potential write to dynptr at off=-16")
......@@ -1061,6 +1270,61 @@ int uninit_write_into_slot(void *ctx)
return 0;
}
/* Only supported prog type can create xdp-type dynptrs */
SEC("?raw_tp")
__failure __msg("calling kernel function bpf_dynptr_from_xdp is not allowed")
int xdp_invalid_ctx(void *ctx)
{
struct bpf_dynptr ptr;
/* this should fail */
bpf_dynptr_from_xdp(ctx, 0, &ptr);
return 0;
}
__u32 hdr_size = sizeof(struct ethhdr);
/* Can't pass in variable-sized len to bpf_dynptr_slice */
SEC("?tc")
__failure __msg("unbounded memory access")
int dynptr_slice_var_len1(struct __sk_buff *skb)
{
struct bpf_dynptr ptr;
struct ethhdr *hdr;
char buffer[sizeof(*hdr)] = {};
bpf_dynptr_from_skb(skb, 0, &ptr);
/* this should fail */
hdr = bpf_dynptr_slice(&ptr, 0, buffer, hdr_size);
if (!hdr)
return SK_DROP;
return SK_PASS;
}
/* Can't pass in variable-sized len to bpf_dynptr_slice */
SEC("?tc")
__failure __msg("must be a known constant")
int dynptr_slice_var_len2(struct __sk_buff *skb)
{
char buffer[sizeof(struct ethhdr)] = {};
struct bpf_dynptr ptr;
struct ethhdr *hdr;
bpf_dynptr_from_skb(skb, 0, &ptr);
if (hdr_size <= sizeof(buffer)) {
/* this should fail */
hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, hdr_size);
if (!hdr)
return SK_DROP;
hdr->h_proto = 12;
}
return SK_PASS;
}
static int callback(__u32 index, void *data)
{
*(__u32 *)data = 123;
......@@ -1092,3 +1356,24 @@ int invalid_data_slices(void *ctx)
return 0;
}
/* Program types that don't allow writes to packet data should fail if
* bpf_dynptr_slice_rdwr is called
*/
SEC("cgroup_skb/ingress")
__failure __msg("the prog does not allow writes to packet data")
int invalid_slice_rdwr_rdonly(struct __sk_buff *skb)
{
char buffer[sizeof(struct ethhdr)] = {};
struct bpf_dynptr ptr;
struct ethhdr *hdr;
bpf_dynptr_from_skb(skb, 0, &ptr);
/* this should fail since cgroup_skb doesn't allow
* changing packet data
*/
hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
return 0;
}
......@@ -5,6 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
#include "bpf_kfuncs.h"
#include "errno.h"
char _license[] SEC("license") = "GPL";
......@@ -30,7 +31,7 @@ struct {
__type(value, __u32);
} array_map SEC(".maps");
SEC("tp/syscalls/sys_enter_nanosleep")
SEC("?tp/syscalls/sys_enter_nanosleep")
int test_read_write(void *ctx)
{
char write_data[64] = "hello there, world!!";
......@@ -61,8 +62,8 @@ int test_read_write(void *ctx)
return 0;
}
SEC("tp/syscalls/sys_enter_nanosleep")
int test_data_slice(void *ctx)
SEC("?tp/syscalls/sys_enter_nanosleep")
int test_dynptr_data(void *ctx)
{
__u32 key = 0, val = 235, *map_val;
struct bpf_dynptr ptr;
......@@ -131,7 +132,7 @@ static int ringbuf_callback(__u32 index, void *data)
return 0;
}
SEC("tp/syscalls/sys_enter_nanosleep")
SEC("?tp/syscalls/sys_enter_nanosleep")
int test_ringbuf(void *ctx)
{
struct bpf_dynptr ptr;
......@@ -163,3 +164,49 @@ int test_ringbuf(void *ctx)
bpf_ringbuf_discard_dynptr(&ptr, 0);
return 0;
}
SEC("?cgroup_skb/egress")
int test_skb_readonly(struct __sk_buff *skb)
{
__u8 write_data[2] = {1, 2};
struct bpf_dynptr ptr;
__u64 *data;
int ret;
if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
err = 1;
return 1;
}
/* since cgroup skbs are read only, writes should fail */
ret = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
if (ret != -EINVAL) {
err = 2;
return 1;
}
return 1;
}
SEC("?cgroup_skb/egress")
int test_dynptr_skb_data(struct __sk_buff *skb)
{
__u8 write_data[2] = {1, 2};
struct bpf_dynptr ptr;
__u64 *data;
int ret;
if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
err = 1;
return 1;
}
/* This should return NULL. Must use bpf_dynptr_slice API */
data = bpf_dynptr_data(&ptr, 0, 1);
if (data) {
err = 2;
return 1;
}
return 1;
}
This diff is collapsed.
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0
/* This parsing logic is taken from the open source library katran, a layer 4
* load balancer.
*
* This code logic using dynptrs can be found in test_parse_tcp_hdr_opt_dynptr.c
*
* https://github.com/facebookincubator/katran/blob/main/katran/lib/bpf/pckt_parsing.h
*/
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/tcp.h>
#include <stdbool.h>
#include <linux/ipv6.h>
#include <linux/if_ether.h>
#include "test_tcp_hdr_options.h"
char _license[] SEC("license") = "GPL";
/* Kind number used for experiments */
const __u32 tcp_hdr_opt_kind_tpr = 0xFD;
/* Length of the tcp header option */
const __u32 tcp_hdr_opt_len_tpr = 6;
/* maximum number of header options to check to lookup server_id */
const __u32 tcp_hdr_opt_max_opt_checks = 15;
__u32 server_id;
struct hdr_opt_state {
__u32 server_id;
__u8 byte_offset;
__u8 hdr_bytes_remaining;
};
static int parse_hdr_opt(const struct xdp_md *xdp, struct hdr_opt_state *state)
{
const void *data = (void *)(long)xdp->data;
const void *data_end = (void *)(long)xdp->data_end;
__u8 *tcp_opt, kind, hdr_len;
tcp_opt = (__u8 *)(data + state->byte_offset);
if (tcp_opt + 1 > data_end)
return -1;
kind = tcp_opt[0];
if (kind == TCPOPT_EOL)
return -1;
if (kind == TCPOPT_NOP) {
state->hdr_bytes_remaining--;
state->byte_offset++;
return 0;
}
if (state->hdr_bytes_remaining < 2 ||
tcp_opt + sizeof(__u8) + sizeof(__u8) > data_end)
return -1;
hdr_len = tcp_opt[1];
if (hdr_len > state->hdr_bytes_remaining)
return -1;
if (kind == tcp_hdr_opt_kind_tpr) {
if (hdr_len != tcp_hdr_opt_len_tpr)
return -1;
if (tcp_opt + tcp_hdr_opt_len_tpr > data_end)
return -1;
state->server_id = *(__u32 *)&tcp_opt[2];
return 1;
}
state->hdr_bytes_remaining -= hdr_len;
state->byte_offset += hdr_len;
return 0;
}
SEC("xdp")
int xdp_ingress_v6(struct xdp_md *xdp)
{
const void *data = (void *)(long)xdp->data;
const void *data_end = (void *)(long)xdp->data_end;
struct hdr_opt_state opt_state = {};
__u8 tcp_hdr_opt_len = 0;
struct tcphdr *tcp_hdr;
__u64 tcp_offset = 0;
__u32 off;
int err;
tcp_offset = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
tcp_hdr = (struct tcphdr *)(data + tcp_offset);
if (tcp_hdr + 1 > data_end)
return XDP_DROP;
tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr);
if (tcp_hdr_opt_len < tcp_hdr_opt_len_tpr)
return XDP_DROP;
opt_state.hdr_bytes_remaining = tcp_hdr_opt_len;
opt_state.byte_offset = sizeof(struct tcphdr) + tcp_offset;
/* max number of bytes of options in tcp header is 40 bytes */
for (int i = 0; i < tcp_hdr_opt_max_opt_checks; i++) {
err = parse_hdr_opt(xdp, &opt_state);
if (err || !opt_state.hdr_bytes_remaining)
break;
}
if (!opt_state.server_id)
return XDP_DROP;
server_id = opt_state.server_id;
return XDP_PASS;
}
// SPDX-License-Identifier: GPL-2.0
/* This logic is lifted from a real-world use case of packet parsing, used in
* the open source library katran, a layer 4 load balancer.
*
* This test demonstrates how to parse packet contents using dynptrs. The
* original code (parsing without dynptrs) can be found in test_parse_tcp_hdr_opt.c
*/
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/tcp.h>
#include <stdbool.h>
#include <linux/ipv6.h>
#include <linux/if_ether.h>
#include "test_tcp_hdr_options.h"
#include "bpf_kfuncs.h"
char _license[] SEC("license") = "GPL";
/* Kind number used for experiments */
const __u32 tcp_hdr_opt_kind_tpr = 0xFD;
/* Length of the tcp header option */
const __u32 tcp_hdr_opt_len_tpr = 6;
/* maximum number of header options to check to lookup server_id */
const __u32 tcp_hdr_opt_max_opt_checks = 15;
__u32 server_id;
static int parse_hdr_opt(struct bpf_dynptr *ptr, __u32 *off, __u8 *hdr_bytes_remaining,
__u32 *server_id)
{
__u8 *tcp_opt, kind, hdr_len;
__u8 buffer[sizeof(kind) + sizeof(hdr_len) + sizeof(*server_id)];
__u8 *data;
__builtin_memset(buffer, 0, sizeof(buffer));
data = bpf_dynptr_slice(ptr, *off, buffer, sizeof(buffer));
if (!data)
return -1;
kind = data[0];
if (kind == TCPOPT_EOL)
return -1;
if (kind == TCPOPT_NOP) {
*off += 1;
*hdr_bytes_remaining -= 1;
return 0;
}
if (*hdr_bytes_remaining < 2)
return -1;
hdr_len = data[1];
if (hdr_len > *hdr_bytes_remaining)
return -1;
if (kind == tcp_hdr_opt_kind_tpr) {
if (hdr_len != tcp_hdr_opt_len_tpr)
return -1;
__builtin_memcpy(server_id, (__u32 *)(data + 2), sizeof(*server_id));
return 1;
}
*off += hdr_len;
*hdr_bytes_remaining -= hdr_len;
return 0;
}
SEC("xdp")
int xdp_ingress_v6(struct xdp_md *xdp)
{
__u8 buffer[sizeof(struct tcphdr)] = {};
__u8 hdr_bytes_remaining;
struct tcphdr *tcp_hdr;
__u8 tcp_hdr_opt_len;
int err = 0;
__u32 off;
struct bpf_dynptr ptr;
bpf_dynptr_from_xdp(xdp, 0, &ptr);
off = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
tcp_hdr = bpf_dynptr_slice(&ptr, off, buffer, sizeof(buffer));
if (!tcp_hdr)
return XDP_DROP;
tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr);
if (tcp_hdr_opt_len < tcp_hdr_opt_len_tpr)
return XDP_DROP;
hdr_bytes_remaining = tcp_hdr_opt_len;
off += sizeof(struct tcphdr);
/* max number of bytes of options in tcp header is 40 bytes */
for (int i = 0; i < tcp_hdr_opt_max_opt_checks; i++) {
err = parse_hdr_opt(&ptr, &off, &hdr_bytes_remaining, &server_id);
if (err || !hdr_bytes_remaining)
break;
}
if (!server_id)
return XDP_DROP;
return XDP_PASS;
}
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Meta */
#include <stddef.h>
#include <string.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/in.h>
#include <linux/udp.h>
#include <linux/tcp.h>
#include <linux/pkt_cls.h>
#include <sys/socket.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "test_iptunnel_common.h"
#include "bpf_kfuncs.h"
const size_t tcphdr_sz = sizeof(struct tcphdr);
const size_t udphdr_sz = sizeof(struct udphdr);
const size_t ethhdr_sz = sizeof(struct ethhdr);
const size_t iphdr_sz = sizeof(struct iphdr);
const size_t ipv6hdr_sz = sizeof(struct ipv6hdr);
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 256);
__type(key, __u32);
__type(value, __u64);
} rxcnt SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, MAX_IPTNL_ENTRIES);
__type(key, struct vip);
__type(value, struct iptnl_info);
} vip2tnl SEC(".maps");
static __always_inline void count_tx(__u32 protocol)
{
__u64 *rxcnt_count;
rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
if (rxcnt_count)
*rxcnt_count += 1;
}
static __always_inline int get_dport(void *trans_data, __u8 protocol)
{
struct tcphdr *th;
struct udphdr *uh;
switch (protocol) {
case IPPROTO_TCP:
th = (struct tcphdr *)trans_data;
return th->dest;
case IPPROTO_UDP:
uh = (struct udphdr *)trans_data;
return uh->dest;
default:
return 0;
}
}
static __always_inline void set_ethhdr(struct ethhdr *new_eth,
const struct ethhdr *old_eth,
const struct iptnl_info *tnl,
__be16 h_proto)
{
memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
new_eth->h_proto = h_proto;
}
static __always_inline int handle_ipv4(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr)
{
__u8 eth_buffer[ethhdr_sz + iphdr_sz + ethhdr_sz];
__u8 iph_buffer_tcp[iphdr_sz + tcphdr_sz];
__u8 iph_buffer_udp[iphdr_sz + udphdr_sz];
struct bpf_dynptr new_xdp_ptr;
struct iptnl_info *tnl;
struct ethhdr *new_eth;
struct ethhdr *old_eth;
__u32 transport_hdr_sz;
struct iphdr *iph;
__u16 *next_iph;
__u16 payload_len;
struct vip vip = {};
int dport;
__u32 csum = 0;
int i;
__builtin_memset(eth_buffer, 0, sizeof(eth_buffer));
__builtin_memset(iph_buffer_tcp, 0, sizeof(iph_buffer_tcp));
__builtin_memset(iph_buffer_udp, 0, sizeof(iph_buffer_udp));
if (ethhdr_sz + iphdr_sz + tcphdr_sz > xdp->data_end - xdp->data)
iph = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, iph_buffer_udp, sizeof(iph_buffer_udp));
else
iph = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, iph_buffer_tcp, sizeof(iph_buffer_tcp));
if (!iph)
return XDP_DROP;
dport = get_dport(iph + 1, iph->protocol);
if (dport == -1)
return XDP_DROP;
vip.protocol = iph->protocol;
vip.family = AF_INET;
vip.daddr.v4 = iph->daddr;
vip.dport = dport;
payload_len = bpf_ntohs(iph->tot_len);
tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
/* It only does v4-in-v4 */
if (!tnl || tnl->family != AF_INET)
return XDP_PASS;
if (bpf_xdp_adjust_head(xdp, 0 - (int)iphdr_sz))
return XDP_DROP;
bpf_dynptr_from_xdp(xdp, 0, &new_xdp_ptr);
new_eth = bpf_dynptr_slice_rdwr(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer));
if (!new_eth)
return XDP_DROP;
iph = (struct iphdr *)(new_eth + 1);
old_eth = (struct ethhdr *)(iph + 1);
set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP));
if (new_eth == eth_buffer)
bpf_dynptr_write(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer), 0);
iph->version = 4;
iph->ihl = iphdr_sz >> 2;
iph->frag_off = 0;
iph->protocol = IPPROTO_IPIP;
iph->check = 0;
iph->tos = 0;
iph->tot_len = bpf_htons(payload_len + iphdr_sz);
iph->daddr = tnl->daddr.v4;
iph->saddr = tnl->saddr.v4;
iph->ttl = 8;
next_iph = (__u16 *)iph;
for (i = 0; i < iphdr_sz >> 1; i++)
csum += *next_iph++;
iph->check = ~((csum & 0xffff) + (csum >> 16));
count_tx(vip.protocol);
return XDP_TX;
}
static __always_inline int handle_ipv6(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr)
{
__u8 eth_buffer[ethhdr_sz + ipv6hdr_sz + ethhdr_sz];
__u8 ip6h_buffer_tcp[ipv6hdr_sz + tcphdr_sz];
__u8 ip6h_buffer_udp[ipv6hdr_sz + udphdr_sz];
struct bpf_dynptr new_xdp_ptr;
struct iptnl_info *tnl;
struct ethhdr *new_eth;
struct ethhdr *old_eth;
__u32 transport_hdr_sz;
struct ipv6hdr *ip6h;
__u16 payload_len;
struct vip vip = {};
int dport;
__builtin_memset(eth_buffer, 0, sizeof(eth_buffer));
__builtin_memset(ip6h_buffer_tcp, 0, sizeof(ip6h_buffer_tcp));
__builtin_memset(ip6h_buffer_udp, 0, sizeof(ip6h_buffer_udp));
if (ethhdr_sz + iphdr_sz + tcphdr_sz > xdp->data_end - xdp->data)
ip6h = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, ip6h_buffer_udp, sizeof(ip6h_buffer_udp));
else
ip6h = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, ip6h_buffer_tcp, sizeof(ip6h_buffer_tcp));
if (!ip6h)
return XDP_DROP;
dport = get_dport(ip6h + 1, ip6h->nexthdr);
if (dport == -1)
return XDP_DROP;
vip.protocol = ip6h->nexthdr;
vip.family = AF_INET6;
memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
vip.dport = dport;
payload_len = ip6h->payload_len;
tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
/* It only does v6-in-v6 */
if (!tnl || tnl->family != AF_INET6)
return XDP_PASS;
if (bpf_xdp_adjust_head(xdp, 0 - (int)ipv6hdr_sz))
return XDP_DROP;
bpf_dynptr_from_xdp(xdp, 0, &new_xdp_ptr);
new_eth = bpf_dynptr_slice_rdwr(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer));
if (!new_eth)
return XDP_DROP;
ip6h = (struct ipv6hdr *)(new_eth + 1);
old_eth = (struct ethhdr *)(ip6h + 1);
set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6));
if (new_eth == eth_buffer)
bpf_dynptr_write(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer), 0);
ip6h->version = 6;
ip6h->priority = 0;
memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + ipv6hdr_sz);
ip6h->nexthdr = IPPROTO_IPV6;
ip6h->hop_limit = 8;
memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
count_tx(vip.protocol);
return XDP_TX;
}
SEC("xdp")
int _xdp_tx_iptunnel(struct xdp_md *xdp)
{
__u8 buffer[ethhdr_sz];
struct bpf_dynptr ptr;
struct ethhdr *eth;
__u16 h_proto;
__builtin_memset(buffer, 0, sizeof(buffer));
bpf_dynptr_from_xdp(xdp, 0, &ptr);
eth = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
if (!eth)
return XDP_DROP;
h_proto = eth->h_proto;
if (h_proto == bpf_htons(ETH_P_IP))
return handle_ipv4(xdp, &ptr);
else if (h_proto == bpf_htons(ETH_P_IPV6))
return handle_ipv6(xdp, &ptr);
else
return XDP_DROP;
}
char _license[] SEC("license") = "GPL";
......@@ -50,6 +50,7 @@ struct linum_err {
#define TCPOPT_EOL 0
#define TCPOPT_NOP 1
#define TCPOPT_MSS 2
#define TCPOPT_WINDOW 3
#define TCPOPT_EXP 254
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment