Commit 74765da1 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf_iter_tcp_udp'

Yonghong Song says:

====================
bpf iterator implments traversal of kernel data structures and these
data structures are passed to a bpf program for processing.
This gives great flexibility for users to examine kernel data
structure without using e.g. /proc/net which has limited and
fixed format.

Commit 138d0be3 ("net: bpf: Add netlink and ipv6_route bpf_iter targets")
implemented bpf iterators for netlink and ipv6_route.
This patch set intends to implement bpf iterators for tcp and udp.

Currently, /proc/net/tcp is used to print tcp4 stats and /proc/net/tcp6
is used to print tcp6 stats. /proc/net/udp[6] have similar usage model.
In contrast, only one tcp iterator is implemented and it is bpf program
resposibility to filter based on socket family. The same is for udp.
This will avoid another unnecessary traversal pass if users want
to check both tcp4 and tcp6.

Several helpers are also implemented in this patch
  bpf_skc_to_{tcp, tcp6, tcp_timewait, tcp_request, udp6}_sock
The argument for these helpers is not a fixed btf_id. For example,
  bpf_skc_to_tcp(struct sock_common *), or
  bpf_skc_to_tcp(struct sock *), or
  bpf_skc_to_tcp(struct inet_sock *), ...
are all valid. At runtime, the helper will check whether pointer cast
is legal or not. Please see Patch #5 for details.

Since btf_id's for both arguments and return value are known at
build time, the btf_id's are pre-computed once vmlinux btf becomes
valid. Jiri's "adding d_path helper" patch set
  https://lore.kernel.org/bpf/20200616100512.2168860-1-jolsa@kernel.org/T/
provides a way to pre-compute btf id during vmlinux build time.
This can be applied here as well. A followup patch can convert
to build time btf id computation after Jiri's patch landed.

Changelogs:
  v4 -> v5:
    - fix bpf_skc_to_udp6_sock helper as besides sk_protocol, sk_family,
      sk_type == SOCK_DGRAM is also needed to differentiate from
      SOCK_RAW (Eric)
  v3 -> v4:
    - fix bpf_skc_to_{tcp_timewait, tcp_request}_sock helper implementation
      as just checking sk->sk_state is not enough (Martin)
    - fix a few kernel test robot reported failures
    - move bpf_tracing_net.h from libbpf to selftests (Andrii)
    - remove __weak attribute from selftests CONFIG_HZ variables (Andrii)
  v2 -> v3:
    - change sock_cast*/SOCK_CAST* names to btf_sock* names for generality (Martin)
    - change gpl_license to false (Martin)
    - fix helper to cast to tcp timewait/request socket. (Martin)
  v1 -> v2:
    - guard init_sock_cast_types() defination properly with CONFIG_NET (Martin)
    - reuse the btf_ids, computed for new helper argument, for return
      values (Martin)
    - using BTF_TYPE_EMIT to express intent of btf type generation (Andrii)
    - abstract out common net macros into bpf_tracing_net.h (Andrii)
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents f9bcf968 cfcd75f9
...@@ -265,6 +265,7 @@ enum bpf_return_type { ...@@ -265,6 +265,7 @@ enum bpf_return_type {
RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */
RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */
RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */
}; };
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
...@@ -287,6 +288,12 @@ struct bpf_func_proto { ...@@ -287,6 +288,12 @@ struct bpf_func_proto {
enum bpf_arg_type arg_type[5]; enum bpf_arg_type arg_type[5];
}; };
int *btf_id; /* BTF ids of arguments */ int *btf_id; /* BTF ids of arguments */
bool (*check_btf_id)(u32 btf_id, u32 arg); /* if the argument btf_id is
* valid. Often used if more
* than one btf id is permitted
* for this argument.
*/
int *ret_btf_id; /* return value btf_id */
}; };
/* bpf_context is intentionally undefined structure. Pointer to bpf_context is /* bpf_context is intentionally undefined structure. Pointer to bpf_context is
...@@ -1524,6 +1531,7 @@ static inline bool bpf_map_is_dev_bound(struct bpf_map *map) ...@@ -1524,6 +1531,7 @@ static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr); struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
void bpf_map_offload_map_free(struct bpf_map *map); void bpf_map_offload_map_free(struct bpf_map *map);
void init_btf_sock_ids(struct btf *btf);
#else #else
static inline int bpf_prog_offload_init(struct bpf_prog *prog, static inline int bpf_prog_offload_init(struct bpf_prog *prog,
union bpf_attr *attr) union bpf_attr *attr)
...@@ -1549,6 +1557,9 @@ static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr) ...@@ -1549,6 +1557,9 @@ static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
static inline void bpf_map_offload_map_free(struct bpf_map *map) static inline void bpf_map_offload_map_free(struct bpf_map *map)
{ {
} }
static inline void init_btf_sock_ids(struct btf *btf)
{
}
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
#if defined(CONFIG_BPF_STREAM_PARSER) #if defined(CONFIG_BPF_STREAM_PARSER)
...@@ -1638,6 +1649,11 @@ extern const struct bpf_func_proto bpf_ringbuf_reserve_proto; ...@@ -1638,6 +1649,11 @@ extern const struct bpf_func_proto bpf_ringbuf_reserve_proto;
extern const struct bpf_func_proto bpf_ringbuf_submit_proto; extern const struct bpf_func_proto bpf_ringbuf_submit_proto;
extern const struct bpf_func_proto bpf_ringbuf_discard_proto; extern const struct bpf_func_proto bpf_ringbuf_discard_proto;
extern const struct bpf_func_proto bpf_ringbuf_query_proto; extern const struct bpf_func_proto bpf_ringbuf_query_proto;
extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
const struct bpf_func_proto *bpf_tracing_func_proto( const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog); enum bpf_func_id func_id, const struct bpf_prog *prog);
......
...@@ -1935,6 +1935,7 @@ struct tcp_iter_state { ...@@ -1935,6 +1935,7 @@ struct tcp_iter_state {
struct seq_net_private p; struct seq_net_private p;
enum tcp_seq_states state; enum tcp_seq_states state;
struct sock *syn_wait_sk; struct sock *syn_wait_sk;
struct tcp_seq_afinfo *bpf_seq_afinfo;
int bucket, offset, sbucket, num; int bucket, offset, sbucket, num;
loff_t last_pos; loff_t last_pos;
}; };
......
...@@ -440,6 +440,7 @@ struct udp_seq_afinfo { ...@@ -440,6 +440,7 @@ struct udp_seq_afinfo {
struct udp_iter_state { struct udp_iter_state {
struct seq_net_private p; struct seq_net_private p;
int bucket; int bucket;
struct udp_seq_afinfo *bpf_seq_afinfo;
}; };
void *udp_seq_start(struct seq_file *seq, loff_t *pos); void *udp_seq_start(struct seq_file *seq, loff_t *pos);
......
...@@ -3255,6 +3255,36 @@ union bpf_attr { ...@@ -3255,6 +3255,36 @@ union bpf_attr {
* case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
* is returned or the error code -EACCES in case the skb is not * is returned or the error code -EACCES in case the skb is not
* subject to CHECKSUM_UNNECESSARY. * subject to CHECKSUM_UNNECESSARY.
*
* struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
* Return
* *sk* if casting is valid, or NULL otherwise.
*
* struct tcp_sock *bpf_skc_to_tcp_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp_sock* pointer.
* Return
* *sk* if casting is valid, or NULL otherwise.
*
* struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer.
* Return
* *sk* if casting is valid, or NULL otherwise.
*
* struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
* Return
* *sk* if casting is valid, or NULL otherwise.
*
* struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
* Return
* *sk* if casting is valid, or NULL otherwise.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -3392,7 +3422,12 @@ union bpf_attr { ...@@ -3392,7 +3422,12 @@ union bpf_attr {
FN(ringbuf_submit), \ FN(ringbuf_submit), \
FN(ringbuf_discard), \ FN(ringbuf_discard), \
FN(ringbuf_query), \ FN(ringbuf_query), \
FN(csum_level), FN(csum_level), \
FN(skc_to_tcp6_sock), \
FN(skc_to_tcp_sock), \
FN(skc_to_tcp_timewait_sock), \
FN(skc_to_tcp_request_sock), \
FN(skc_to_udp6_sock),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
......
...@@ -3674,6 +3674,7 @@ struct btf *btf_parse_vmlinux(void) ...@@ -3674,6 +3674,7 @@ struct btf *btf_parse_vmlinux(void)
goto errout; goto errout;
bpf_struct_ops_init(btf, log); bpf_struct_ops_init(btf, log);
init_btf_sock_ids(btf);
btf_verifier_env_free(env); btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1); refcount_set(&btf->refcnt, 1);
......
...@@ -3800,12 +3800,14 @@ static int int_ptr_type_to_size(enum bpf_arg_type type) ...@@ -3800,12 +3800,14 @@ static int int_ptr_type_to_size(enum bpf_arg_type type)
return -EINVAL; return -EINVAL;
} }
static int check_func_arg(struct bpf_verifier_env *env, u32 regno, static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta,
struct bpf_call_arg_meta *meta) const struct bpf_func_proto *fn)
{ {
u32 regno = BPF_REG_1 + arg;
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno]; struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
enum bpf_reg_type expected_type, type = reg->type; enum bpf_reg_type expected_type, type = reg->type;
enum bpf_arg_type arg_type = fn->arg_type[arg];
int err = 0; int err = 0;
if (arg_type == ARG_DONTCARE) if (arg_type == ARG_DONTCARE)
...@@ -3885,6 +3887,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, ...@@ -3885,6 +3887,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
expected_type = PTR_TO_BTF_ID; expected_type = PTR_TO_BTF_ID;
if (type != expected_type) if (type != expected_type)
goto err_type; goto err_type;
if (!fn->check_btf_id) {
if (reg->btf_id != meta->btf_id) { if (reg->btf_id != meta->btf_id) {
verbose(env, "Helper has type %s got %s in R%d\n", verbose(env, "Helper has type %s got %s in R%d\n",
kernel_type_name(meta->btf_id), kernel_type_name(meta->btf_id),
...@@ -3892,6 +3895,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, ...@@ -3892,6 +3895,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
return -EACCES; return -EACCES;
} }
} else if (!fn->check_btf_id(reg->btf_id, arg)) {
verbose(env, "Helper does not support %s in R%d\n",
kernel_type_name(reg->btf_id), regno);
return -EACCES;
}
if (!tnum_is_const(reg->var_off) || reg->var_off.value || reg->off) { if (!tnum_is_const(reg->var_off) || reg->var_off.value || reg->off) {
verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n", verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
regno); regno);
...@@ -4709,10 +4718,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn ...@@ -4709,10 +4718,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
meta.func_id = func_id; meta.func_id = func_id;
/* check args */ /* check args */
for (i = 0; i < 5; i++) { for (i = 0; i < 5; i++) {
if (!fn->check_btf_id) {
err = btf_resolve_helper_id(&env->log, fn, i); err = btf_resolve_helper_id(&env->log, fn, i);
if (err > 0) if (err > 0)
meta.btf_id = err; meta.btf_id = err;
err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta); }
err = check_func_arg(env, i, &meta, fn);
if (err) if (err)
return err; return err;
} }
...@@ -4815,6 +4826,18 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn ...@@ -4815,6 +4826,18 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
regs[BPF_REG_0].id = ++env->id_gen; regs[BPF_REG_0].id = ++env->id_gen;
regs[BPF_REG_0].mem_size = meta.mem_size; regs[BPF_REG_0].mem_size = meta.mem_size;
} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
int ret_btf_id;
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
ret_btf_id = *fn->ret_btf_id;
if (ret_btf_id == 0) {
verbose(env, "invalid return type %d of func %s#%d\n",
fn->ret_type, func_id_name(func_id), func_id);
return -EINVAL;
}
regs[BPF_REG_0].btf_id = ret_btf_id;
} else { } else {
verbose(env, "unknown return type %d of func %s#%d\n", verbose(env, "unknown return type %d of func %s#%d\n",
fn->ret_type, func_id_name(func_id), func_id); fn->ret_type, func_id_name(func_id), func_id);
......
...@@ -681,7 +681,8 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, ...@@ -681,7 +681,8 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
} }
if (fmt[i] != 'i' && fmt[i] != 'd' && if (fmt[i] != 'i' && fmt[i] != 'd' &&
fmt[i] != 'u' && fmt[i] != 'x') { fmt[i] != 'u' && fmt[i] != 'x' &&
fmt[i] != 'X') {
err = -EINVAL; err = -EINVAL;
goto out; goto out;
} }
...@@ -1134,6 +1135,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -1134,6 +1135,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_ringbuf_discard_proto; return &bpf_ringbuf_discard_proto;
case BPF_FUNC_ringbuf_query: case BPF_FUNC_ringbuf_query:
return &bpf_ringbuf_query_proto; return &bpf_ringbuf_query_proto;
case BPF_FUNC_jiffies64:
return &bpf_jiffies64_proto;
default: default:
return NULL; return NULL;
} }
...@@ -1512,6 +1515,16 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -1512,6 +1515,16 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skb_output_proto; return &bpf_skb_output_proto;
case BPF_FUNC_xdp_output: case BPF_FUNC_xdp_output:
return &bpf_xdp_output_proto; return &bpf_xdp_output_proto;
case BPF_FUNC_skc_to_tcp6_sock:
return &bpf_skc_to_tcp6_sock_proto;
case BPF_FUNC_skc_to_tcp_sock:
return &bpf_skc_to_tcp_sock_proto;
case BPF_FUNC_skc_to_tcp_timewait_sock:
return &bpf_skc_to_tcp_timewait_sock_proto;
case BPF_FUNC_skc_to_tcp_request_sock:
return &bpf_skc_to_tcp_request_sock_proto;
case BPF_FUNC_skc_to_udp6_sock:
return &bpf_skc_to_udp6_sock_proto;
#endif #endif
case BPF_FUNC_seq_printf: case BPF_FUNC_seq_printf:
return prog->expected_attach_type == BPF_TRACE_ITER ? return prog->expected_attach_type == BPF_TRACE_ITER ?
......
...@@ -47,6 +47,7 @@ ...@@ -47,6 +47,7 @@
#include <linux/seccomp.h> #include <linux/seccomp.h>
#include <linux/if_vlan.h> #include <linux/if_vlan.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <linux/btf.h>
#include <net/sch_generic.h> #include <net/sch_generic.h>
#include <net/cls_cgroup.h> #include <net/cls_cgroup.h>
#include <net/dst_metadata.h> #include <net/dst_metadata.h>
...@@ -73,6 +74,7 @@ ...@@ -73,6 +74,7 @@
#include <net/lwtunnel.h> #include <net/lwtunnel.h>
#include <net/ipv6_stubs.h> #include <net/ipv6_stubs.h>
#include <net/bpf_sk_storage.h> #include <net/bpf_sk_storage.h>
#include <net/transp_v6.h>
/** /**
* sk_filter_trim_cap - run a packet through a socket filter * sk_filter_trim_cap - run a packet through a socket filter
...@@ -9225,3 +9227,167 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) ...@@ -9225,3 +9227,167 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
{ {
bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
} }
/* Define a list of socket types which can be the argument for
* skc_to_*_sock() helpers. All these sockets should have
* sock_common as the first argument in its memory layout.
*/
#define BTF_SOCK_TYPE_xxx \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, "inet_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, "inet_connection_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, "inet_request_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, "inet_timewait_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, "request_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, "sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, "sock_common") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, "tcp_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, "tcp_request_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, "tcp_timewait_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, "tcp6_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, "udp_sock") \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, "udp6_sock")
enum {
#define BTF_SOCK_TYPE(name, str) name,
BTF_SOCK_TYPE_xxx
#undef BTF_SOCK_TYPE
MAX_BTF_SOCK_TYPE,
};
static int btf_sock_ids[MAX_BTF_SOCK_TYPE];
#ifdef CONFIG_BPF_SYSCALL
static const char *bpf_sock_types[] = {
#define BTF_SOCK_TYPE(name, str) str,
BTF_SOCK_TYPE_xxx
#undef BTF_SOCK_TYPE
};
void init_btf_sock_ids(struct btf *btf)
{
int i, btf_id;
for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) {
btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i],
BTF_KIND_STRUCT);
if (btf_id > 0)
btf_sock_ids[i] = btf_id;
}
}
#endif
static bool check_arg_btf_id(u32 btf_id, u32 arg)
{
int i;
/* only one argument, no need to check arg */
for (i = 0; i < MAX_BTF_SOCK_TYPE; i++)
if (btf_sock_ids[i] == btf_id)
return true;
return false;
}
BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
{
/* tcp6_sock type is not generated in dwarf and hence btf,
* trigger an explicit type generation here.
*/
BTF_TYPE_EMIT(struct tcp6_sock);
if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
sk->sk_family == AF_INET6)
return (unsigned long)sk;
return (unsigned long)NULL;
}
const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
.func = bpf_skc_to_tcp6_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_BTF_ID,
.check_btf_id = check_arg_btf_id,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
};
BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
{
if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
return (unsigned long)sk;
return (unsigned long)NULL;
}
const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
.func = bpf_skc_to_tcp_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_BTF_ID,
.check_btf_id = check_arg_btf_id,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
};
BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
{
if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
return (unsigned long)sk;
#if IS_BUILTIN(CONFIG_IPV6)
if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
return (unsigned long)sk;
#endif
return (unsigned long)NULL;
}
const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
.func = bpf_skc_to_tcp_timewait_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_BTF_ID,
.check_btf_id = check_arg_btf_id,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
};
BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
{
if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV)
return (unsigned long)sk;
#if IS_BUILTIN(CONFIG_IPV6)
if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
return (unsigned long)sk;
#endif
return (unsigned long)NULL;
}
const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
.func = bpf_skc_to_tcp_request_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_BTF_ID,
.check_btf_id = check_arg_btf_id,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
};
BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
{
/* udp6_sock type is not generated in dwarf and hence btf,
* trigger an explicit type generation here.
*/
BTF_TYPE_EMIT(struct udp6_sock);
if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6)
return (unsigned long)sk;
return (unsigned long)NULL;
}
const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
.func = bpf_skc_to_udp6_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_BTF_ID,
.check_btf_id = check_arg_btf_id,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
};
...@@ -2211,13 +2211,18 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); ...@@ -2211,13 +2211,18 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
*/ */
static void *listening_get_next(struct seq_file *seq, void *cur) static void *listening_get_next(struct seq_file *seq, void *cur)
{ {
struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); struct tcp_seq_afinfo *afinfo;
struct tcp_iter_state *st = seq->private; struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq); struct net *net = seq_file_net(seq);
struct inet_listen_hashbucket *ilb; struct inet_listen_hashbucket *ilb;
struct hlist_nulls_node *node; struct hlist_nulls_node *node;
struct sock *sk = cur; struct sock *sk = cur;
if (st->bpf_seq_afinfo)
afinfo = st->bpf_seq_afinfo;
else
afinfo = PDE_DATA(file_inode(seq->file));
if (!sk) { if (!sk) {
get_head: get_head:
ilb = &tcp_hashinfo.listening_hash[st->bucket]; ilb = &tcp_hashinfo.listening_hash[st->bucket];
...@@ -2235,7 +2240,8 @@ static void *listening_get_next(struct seq_file *seq, void *cur) ...@@ -2235,7 +2240,8 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
sk_nulls_for_each_from(sk, node) { sk_nulls_for_each_from(sk, node) {
if (!net_eq(sock_net(sk), net)) if (!net_eq(sock_net(sk), net))
continue; continue;
if (sk->sk_family == afinfo->family) if (afinfo->family == AF_UNSPEC ||
sk->sk_family == afinfo->family)
return sk; return sk;
} }
spin_unlock(&ilb->lock); spin_unlock(&ilb->lock);
...@@ -2272,11 +2278,16 @@ static inline bool empty_bucket(const struct tcp_iter_state *st) ...@@ -2272,11 +2278,16 @@ static inline bool empty_bucket(const struct tcp_iter_state *st)
*/ */
static void *established_get_first(struct seq_file *seq) static void *established_get_first(struct seq_file *seq)
{ {
struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); struct tcp_seq_afinfo *afinfo;
struct tcp_iter_state *st = seq->private; struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq); struct net *net = seq_file_net(seq);
void *rc = NULL; void *rc = NULL;
if (st->bpf_seq_afinfo)
afinfo = st->bpf_seq_afinfo;
else
afinfo = PDE_DATA(file_inode(seq->file));
st->offset = 0; st->offset = 0;
for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
struct sock *sk; struct sock *sk;
...@@ -2289,7 +2300,8 @@ static void *established_get_first(struct seq_file *seq) ...@@ -2289,7 +2300,8 @@ static void *established_get_first(struct seq_file *seq)
spin_lock_bh(lock); spin_lock_bh(lock);
sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
if (sk->sk_family != afinfo->family || if ((afinfo->family != AF_UNSPEC &&
sk->sk_family != afinfo->family) ||
!net_eq(sock_net(sk), net)) { !net_eq(sock_net(sk), net)) {
continue; continue;
} }
...@@ -2304,19 +2316,25 @@ static void *established_get_first(struct seq_file *seq) ...@@ -2304,19 +2316,25 @@ static void *established_get_first(struct seq_file *seq)
static void *established_get_next(struct seq_file *seq, void *cur) static void *established_get_next(struct seq_file *seq, void *cur)
{ {
struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); struct tcp_seq_afinfo *afinfo;
struct sock *sk = cur; struct sock *sk = cur;
struct hlist_nulls_node *node; struct hlist_nulls_node *node;
struct tcp_iter_state *st = seq->private; struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq); struct net *net = seq_file_net(seq);
if (st->bpf_seq_afinfo)
afinfo = st->bpf_seq_afinfo;
else
afinfo = PDE_DATA(file_inode(seq->file));
++st->num; ++st->num;
++st->offset; ++st->offset;
sk = sk_nulls_next(sk); sk = sk_nulls_next(sk);
sk_nulls_for_each_from(sk, node) { sk_nulls_for_each_from(sk, node) {
if (sk->sk_family == afinfo->family && if ((afinfo->family == AF_UNSPEC ||
sk->sk_family == afinfo->family) &&
net_eq(sock_net(sk), net)) net_eq(sock_net(sk), net))
return sk; return sk;
} }
...@@ -2595,6 +2613,74 @@ static int tcp4_seq_show(struct seq_file *seq, void *v) ...@@ -2595,6 +2613,74 @@ static int tcp4_seq_show(struct seq_file *seq, void *v)
return 0; return 0;
} }
#ifdef CONFIG_BPF_SYSCALL
struct bpf_iter__tcp {
__bpf_md_ptr(struct bpf_iter_meta *, meta);
__bpf_md_ptr(struct sock_common *, sk_common);
uid_t uid __aligned(8);
};
static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
struct sock_common *sk_common, uid_t uid)
{
struct bpf_iter__tcp ctx;
meta->seq_num--; /* skip SEQ_START_TOKEN */
ctx.meta = meta;
ctx.sk_common = sk_common;
ctx.uid = uid;
return bpf_iter_run_prog(prog, &ctx);
}
static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
{
struct bpf_iter_meta meta;
struct bpf_prog *prog;
struct sock *sk = v;
uid_t uid;
if (v == SEQ_START_TOKEN)
return 0;
if (sk->sk_state == TCP_TIME_WAIT) {
uid = 0;
} else if (sk->sk_state == TCP_NEW_SYN_RECV) {
const struct request_sock *req = v;
uid = from_kuid_munged(seq_user_ns(seq),
sock_i_uid(req->rsk_listener));
} else {
uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
}
meta.seq = seq;
prog = bpf_iter_get_info(&meta, false);
return tcp_prog_seq_show(prog, &meta, v, uid);
}
static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
{
struct bpf_iter_meta meta;
struct bpf_prog *prog;
if (!v) {
meta.seq = seq;
prog = bpf_iter_get_info(&meta, true);
if (prog)
(void)tcp_prog_seq_show(prog, &meta, v, 0);
}
tcp_seq_stop(seq, v);
}
static const struct seq_operations bpf_iter_tcp_seq_ops = {
.show = bpf_iter_tcp_seq_show,
.start = tcp_seq_start,
.next = tcp_seq_next,
.stop = bpf_iter_tcp_seq_stop,
};
#endif
static const struct seq_operations tcp4_seq_ops = { static const struct seq_operations tcp4_seq_ops = {
.show = tcp4_seq_show, .show = tcp4_seq_show,
.start = tcp_seq_start, .start = tcp_seq_start,
...@@ -2826,8 +2912,63 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { ...@@ -2826,8 +2912,63 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
.exit_batch = tcp_sk_exit_batch, .exit_batch = tcp_sk_exit_batch,
}; };
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta,
struct sock_common *sk_common, uid_t uid)
static int bpf_iter_init_tcp(void *priv_data)
{
struct tcp_iter_state *st = priv_data;
struct tcp_seq_afinfo *afinfo;
int ret;
afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
if (!afinfo)
return -ENOMEM;
afinfo->family = AF_UNSPEC;
st->bpf_seq_afinfo = afinfo;
ret = bpf_iter_init_seq_net(priv_data);
if (ret)
kfree(afinfo);
return ret;
}
static void bpf_iter_fini_tcp(void *priv_data)
{
struct tcp_iter_state *st = priv_data;
kfree(st->bpf_seq_afinfo);
bpf_iter_fini_seq_net(priv_data);
}
static const struct bpf_iter_reg tcp_reg_info = {
.target = "tcp",
.seq_ops = &bpf_iter_tcp_seq_ops,
.init_seq_private = bpf_iter_init_tcp,
.fini_seq_private = bpf_iter_fini_tcp,
.seq_priv_size = sizeof(struct tcp_iter_state),
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__tcp, sk_common),
PTR_TO_BTF_ID_OR_NULL },
},
};
static void __init bpf_iter_register(void)
{
if (bpf_iter_reg_target(&tcp_reg_info))
pr_warn("Warning: could not register bpf iterator tcp\n");
}
#endif
void __init tcp_v4_init(void) void __init tcp_v4_init(void)
{ {
if (register_pernet_subsys(&tcp_sk_ops)) if (register_pernet_subsys(&tcp_sk_ops))
panic("Failed to create the TCP control socket.\n"); panic("Failed to create the TCP control socket.\n");
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
bpf_iter_register();
#endif
} }
...@@ -2826,10 +2826,15 @@ EXPORT_SYMBOL(udp_prot); ...@@ -2826,10 +2826,15 @@ EXPORT_SYMBOL(udp_prot);
static struct sock *udp_get_first(struct seq_file *seq, int start) static struct sock *udp_get_first(struct seq_file *seq, int start)
{ {
struct sock *sk; struct sock *sk;
struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); struct udp_seq_afinfo *afinfo;
struct udp_iter_state *state = seq->private; struct udp_iter_state *state = seq->private;
struct net *net = seq_file_net(seq); struct net *net = seq_file_net(seq);
if (state->bpf_seq_afinfo)
afinfo = state->bpf_seq_afinfo;
else
afinfo = PDE_DATA(file_inode(seq->file));
for (state->bucket = start; state->bucket <= afinfo->udp_table->mask; for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
++state->bucket) { ++state->bucket) {
struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket]; struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket];
...@@ -2841,7 +2846,8 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) ...@@ -2841,7 +2846,8 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
sk_for_each(sk, &hslot->head) { sk_for_each(sk, &hslot->head) {
if (!net_eq(sock_net(sk), net)) if (!net_eq(sock_net(sk), net))
continue; continue;
if (sk->sk_family == afinfo->family) if (afinfo->family == AF_UNSPEC ||
sk->sk_family == afinfo->family)
goto found; goto found;
} }
spin_unlock_bh(&hslot->lock); spin_unlock_bh(&hslot->lock);
...@@ -2853,13 +2859,20 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) ...@@ -2853,13 +2859,20 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
{ {
struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); struct udp_seq_afinfo *afinfo;
struct udp_iter_state *state = seq->private; struct udp_iter_state *state = seq->private;
struct net *net = seq_file_net(seq); struct net *net = seq_file_net(seq);
if (state->bpf_seq_afinfo)
afinfo = state->bpf_seq_afinfo;
else
afinfo = PDE_DATA(file_inode(seq->file));
do { do {
sk = sk_next(sk); sk = sk_next(sk);
} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != afinfo->family)); } while (sk && (!net_eq(sock_net(sk), net) ||
(afinfo->family != AF_UNSPEC &&
sk->sk_family != afinfo->family)));
if (!sk) { if (!sk) {
if (state->bucket <= afinfo->udp_table->mask) if (state->bucket <= afinfo->udp_table->mask)
...@@ -2904,9 +2917,14 @@ EXPORT_SYMBOL(udp_seq_next); ...@@ -2904,9 +2917,14 @@ EXPORT_SYMBOL(udp_seq_next);
void udp_seq_stop(struct seq_file *seq, void *v) void udp_seq_stop(struct seq_file *seq, void *v)
{ {
struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); struct udp_seq_afinfo *afinfo;
struct udp_iter_state *state = seq->private; struct udp_iter_state *state = seq->private;
if (state->bpf_seq_afinfo)
afinfo = state->bpf_seq_afinfo;
else
afinfo = PDE_DATA(file_inode(seq->file));
if (state->bucket <= afinfo->udp_table->mask) if (state->bucket <= afinfo->udp_table->mask)
spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock); spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
} }
...@@ -2950,6 +2968,67 @@ int udp4_seq_show(struct seq_file *seq, void *v) ...@@ -2950,6 +2968,67 @@ int udp4_seq_show(struct seq_file *seq, void *v)
return 0; return 0;
} }
#ifdef CONFIG_BPF_SYSCALL
struct bpf_iter__udp {
__bpf_md_ptr(struct bpf_iter_meta *, meta);
__bpf_md_ptr(struct udp_sock *, udp_sk);
uid_t uid __aligned(8);
int bucket __aligned(8);
};
static int udp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
struct udp_sock *udp_sk, uid_t uid, int bucket)
{
struct bpf_iter__udp ctx;
meta->seq_num--; /* skip SEQ_START_TOKEN */
ctx.meta = meta;
ctx.udp_sk = udp_sk;
ctx.uid = uid;
ctx.bucket = bucket;
return bpf_iter_run_prog(prog, &ctx);
}
static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v)
{
struct udp_iter_state *state = seq->private;
struct bpf_iter_meta meta;
struct bpf_prog *prog;
struct sock *sk = v;
uid_t uid;
if (v == SEQ_START_TOKEN)
return 0;
uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
meta.seq = seq;
prog = bpf_iter_get_info(&meta, false);
return udp_prog_seq_show(prog, &meta, v, uid, state->bucket);
}
static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
{
struct bpf_iter_meta meta;
struct bpf_prog *prog;
if (!v) {
meta.seq = seq;
prog = bpf_iter_get_info(&meta, true);
if (prog)
(void)udp_prog_seq_show(prog, &meta, v, 0, 0);
}
udp_seq_stop(seq, v);
}
static const struct seq_operations bpf_iter_udp_seq_ops = {
.start = udp_seq_start,
.next = udp_seq_next,
.stop = bpf_iter_udp_seq_stop,
.show = bpf_iter_udp_seq_show,
};
#endif
const struct seq_operations udp_seq_ops = { const struct seq_operations udp_seq_ops = {
.start = udp_seq_start, .start = udp_seq_start,
.next = udp_seq_next, .next = udp_seq_next,
...@@ -3067,6 +3146,57 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = { ...@@ -3067,6 +3146,57 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = {
.init = udp_sysctl_init, .init = udp_sysctl_init,
}; };
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta,
struct udp_sock *udp_sk, uid_t uid, int bucket)
static int bpf_iter_init_udp(void *priv_data)
{
struct udp_iter_state *st = priv_data;
struct udp_seq_afinfo *afinfo;
int ret;
afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
if (!afinfo)
return -ENOMEM;
afinfo->family = AF_UNSPEC;
afinfo->udp_table = &udp_table;
st->bpf_seq_afinfo = afinfo;
ret = bpf_iter_init_seq_net(priv_data);
if (ret)
kfree(afinfo);
return ret;
}
static void bpf_iter_fini_udp(void *priv_data)
{
struct udp_iter_state *st = priv_data;
kfree(st->bpf_seq_afinfo);
bpf_iter_fini_seq_net(priv_data);
}
static const struct bpf_iter_reg udp_reg_info = {
.target = "udp",
.seq_ops = &bpf_iter_udp_seq_ops,
.init_seq_private = bpf_iter_init_udp,
.fini_seq_private = bpf_iter_fini_udp,
.seq_priv_size = sizeof(struct udp_iter_state),
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__udp, udp_sk),
PTR_TO_BTF_ID_OR_NULL },
},
};
static void __init bpf_iter_register(void)
{
if (bpf_iter_reg_target(&udp_reg_info))
pr_warn("Warning: could not register bpf iterator udp\n");
}
#endif
void __init udp_init(void) void __init udp_init(void)
{ {
unsigned long limit; unsigned long limit;
...@@ -3092,4 +3222,8 @@ void __init udp_init(void) ...@@ -3092,4 +3222,8 @@ void __init udp_init(void)
if (register_pernet_subsys(&udp_sysctl_ops)) if (register_pernet_subsys(&udp_sysctl_ops))
panic("UDP: failed to init sysctl parameters.\n"); panic("UDP: failed to init sysctl parameters.\n");
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
bpf_iter_register();
#endif
} }
...@@ -421,6 +421,11 @@ class PrinterHelpers(Printer): ...@@ -421,6 +421,11 @@ class PrinterHelpers(Printer):
'struct sockaddr', 'struct sockaddr',
'struct tcphdr', 'struct tcphdr',
'struct seq_file', 'struct seq_file',
'struct tcp6_sock',
'struct tcp_sock',
'struct tcp_timewait_sock',
'struct tcp_request_sock',
'struct udp6_sock',
'struct __sk_buff', 'struct __sk_buff',
'struct sk_msg_md', 'struct sk_msg_md',
...@@ -458,6 +463,11 @@ class PrinterHelpers(Printer): ...@@ -458,6 +463,11 @@ class PrinterHelpers(Printer):
'struct sockaddr', 'struct sockaddr',
'struct tcphdr', 'struct tcphdr',
'struct seq_file', 'struct seq_file',
'struct tcp6_sock',
'struct tcp_sock',
'struct tcp_timewait_sock',
'struct tcp_request_sock',
'struct udp6_sock',
} }
mapped_types = { mapped_types = {
'u8': '__u8', 'u8': '__u8',
......
...@@ -3255,6 +3255,36 @@ union bpf_attr { ...@@ -3255,6 +3255,36 @@ union bpf_attr {
* case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
* is returned or the error code -EACCES in case the skb is not * is returned or the error code -EACCES in case the skb is not
* subject to CHECKSUM_UNNECESSARY. * subject to CHECKSUM_UNNECESSARY.
*
* struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
* Return
* *sk* if casting is valid, or NULL otherwise.
*
* struct tcp_sock *bpf_skc_to_tcp_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp_sock* pointer.
* Return
* *sk* if casting is valid, or NULL otherwise.
*
* struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer.
* Return
* *sk* if casting is valid, or NULL otherwise.
*
* struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
* Return
* *sk* if casting is valid, or NULL otherwise.
*
* struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
* Return
* *sk* if casting is valid, or NULL otherwise.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -3392,7 +3422,12 @@ union bpf_attr { ...@@ -3392,7 +3422,12 @@ union bpf_attr {
FN(ringbuf_submit), \ FN(ringbuf_submit), \
FN(ringbuf_discard), \ FN(ringbuf_discard), \
FN(ringbuf_query), \ FN(ringbuf_query), \
FN(csum_level), FN(csum_level), \
FN(skc_to_tcp6_sock), \
FN(skc_to_tcp_sock), \
FN(skc_to_tcp_timewait_sock), \
FN(skc_to_tcp_request_sock), \
FN(skc_to_udp6_sock),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
......
...@@ -6,6 +6,10 @@ ...@@ -6,6 +6,10 @@
#include "bpf_iter_bpf_map.skel.h" #include "bpf_iter_bpf_map.skel.h"
#include "bpf_iter_task.skel.h" #include "bpf_iter_task.skel.h"
#include "bpf_iter_task_file.skel.h" #include "bpf_iter_task_file.skel.h"
#include "bpf_iter_tcp4.skel.h"
#include "bpf_iter_tcp6.skel.h"
#include "bpf_iter_udp4.skel.h"
#include "bpf_iter_udp6.skel.h"
#include "bpf_iter_test_kern1.skel.h" #include "bpf_iter_test_kern1.skel.h"
#include "bpf_iter_test_kern2.skel.h" #include "bpf_iter_test_kern2.skel.h"
#include "bpf_iter_test_kern3.skel.h" #include "bpf_iter_test_kern3.skel.h"
...@@ -120,6 +124,62 @@ static void test_task_file(void) ...@@ -120,6 +124,62 @@ static void test_task_file(void)
bpf_iter_task_file__destroy(skel); bpf_iter_task_file__destroy(skel);
} }
static void test_tcp4(void)
{
struct bpf_iter_tcp4 *skel;
skel = bpf_iter_tcp4__open_and_load();
if (CHECK(!skel, "bpf_iter_tcp4__open_and_load",
"skeleton open_and_load failed\n"))
return;
do_dummy_read(skel->progs.dump_tcp4);
bpf_iter_tcp4__destroy(skel);
}
static void test_tcp6(void)
{
struct bpf_iter_tcp6 *skel;
skel = bpf_iter_tcp6__open_and_load();
if (CHECK(!skel, "bpf_iter_tcp6__open_and_load",
"skeleton open_and_load failed\n"))
return;
do_dummy_read(skel->progs.dump_tcp6);
bpf_iter_tcp6__destroy(skel);
}
static void test_udp4(void)
{
struct bpf_iter_udp4 *skel;
skel = bpf_iter_udp4__open_and_load();
if (CHECK(!skel, "bpf_iter_udp4__open_and_load",
"skeleton open_and_load failed\n"))
return;
do_dummy_read(skel->progs.dump_udp4);
bpf_iter_udp4__destroy(skel);
}
static void test_udp6(void)
{
struct bpf_iter_udp6 *skel;
skel = bpf_iter_udp6__open_and_load();
if (CHECK(!skel, "bpf_iter_udp6__open_and_load",
"skeleton open_and_load failed\n"))
return;
do_dummy_read(skel->progs.dump_udp6);
bpf_iter_udp6__destroy(skel);
}
/* The expected string is less than 16 bytes */ /* The expected string is less than 16 bytes */
static int do_read_with_fd(int iter_fd, const char *expected, static int do_read_with_fd(int iter_fd, const char *expected,
bool read_one_char) bool read_one_char)
...@@ -394,6 +454,14 @@ void test_bpf_iter(void) ...@@ -394,6 +454,14 @@ void test_bpf_iter(void)
test_task(); test_task();
if (test__start_subtest("task_file")) if (test__start_subtest("task_file"))
test_task_file(); test_task_file();
if (test__start_subtest("tcp4"))
test_tcp4();
if (test__start_subtest("tcp6"))
test_tcp6();
if (test__start_subtest("udp4"))
test_udp4();
if (test__start_subtest("udp6"))
test_udp6();
if (test__start_subtest("anon")) if (test__start_subtest("anon"))
test_anon_iter(false); test_anon_iter(false);
if (test__start_subtest("anon-read-one-char")) if (test__start_subtest("anon-read-one-char"))
......
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2020 Facebook */
/* "undefine" structs in vmlinux.h, because we "override" them below */
#define bpf_iter_meta bpf_iter_meta___not_used
#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
#define bpf_iter__netlink bpf_iter__netlink___not_used
#define bpf_iter__task bpf_iter__task___not_used
#define bpf_iter__task_file bpf_iter__task_file___not_used
#define bpf_iter__tcp bpf_iter__tcp___not_used
#define tcp6_sock tcp6_sock___not_used
#define bpf_iter__udp bpf_iter__udp___not_used
#define udp6_sock udp6_sock___not_used
#include "vmlinux.h"
#undef bpf_iter_meta
#undef bpf_iter__bpf_map
#undef bpf_iter__ipv6_route
#undef bpf_iter__netlink
#undef bpf_iter__task
#undef bpf_iter__task_file
#undef bpf_iter__tcp
#undef tcp6_sock
#undef bpf_iter__udp
#undef udp6_sock
struct bpf_iter_meta {
struct seq_file *seq;
__u64 session_id;
__u64 seq_num;
} __attribute__((preserve_access_index));
struct bpf_iter__ipv6_route {
struct bpf_iter_meta *meta;
struct fib6_info *rt;
} __attribute__((preserve_access_index));
struct bpf_iter__netlink {
struct bpf_iter_meta *meta;
struct netlink_sock *sk;
} __attribute__((preserve_access_index));
struct bpf_iter__task {
struct bpf_iter_meta *meta;
struct task_struct *task;
} __attribute__((preserve_access_index));
struct bpf_iter__task_file {
struct bpf_iter_meta *meta;
struct task_struct *task;
__u32 fd;
struct file *file;
} __attribute__((preserve_access_index));
struct bpf_iter__bpf_map {
struct bpf_iter_meta *meta;
struct bpf_map *map;
} __attribute__((preserve_access_index));
struct bpf_iter__tcp {
struct bpf_iter_meta *meta;
struct sock_common *sk_common;
uid_t uid;
} __attribute__((preserve_access_index));
struct tcp6_sock {
struct tcp_sock tcp;
struct ipv6_pinfo inet6;
} __attribute__((preserve_access_index));
struct bpf_iter__udp {
struct bpf_iter_meta *meta;
struct udp_sock *udp_sk;
uid_t uid __attribute__((aligned(8)));
int bucket __attribute__((aligned(8)));
} __attribute__((preserve_access_index));
struct udp6_sock {
struct udp_sock udp;
struct ipv6_pinfo inet6;
} __attribute__((preserve_access_index));
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */ /* Copyright (c) 2020 Facebook */
/* "undefine" structs in vmlinux.h, because we "override" them below */ #include "bpf_iter.h"
#define bpf_iter_meta bpf_iter_meta___not_used
#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
#include "vmlinux.h"
#undef bpf_iter_meta
#undef bpf_iter__bpf_map
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h> #include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
struct bpf_iter_meta {
struct seq_file *seq;
__u64 session_id;
__u64 seq_num;
} __attribute__((preserve_access_index));
struct bpf_iter__bpf_map {
struct bpf_iter_meta *meta;
struct bpf_map *map;
} __attribute__((preserve_access_index));
SEC("iter/bpf_map") SEC("iter/bpf_map")
int dump_bpf_map(struct bpf_iter__bpf_map *ctx) int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
{ {
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */ /* Copyright (c) 2020 Facebook */
/* "undefine" structs in vmlinux.h, because we "override" them below */ #include "bpf_iter.h"
#define bpf_iter_meta bpf_iter_meta___not_used #include "bpf_tracing_net.h"
#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
#include "vmlinux.h"
#undef bpf_iter_meta
#undef bpf_iter__ipv6_route
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h> #include <bpf/bpf_tracing.h>
struct bpf_iter_meta {
struct seq_file *seq;
__u64 session_id;
__u64 seq_num;
} __attribute__((preserve_access_index));
struct bpf_iter__ipv6_route {
struct bpf_iter_meta *meta;
struct fib6_info *rt;
} __attribute__((preserve_access_index));
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
extern bool CONFIG_IPV6_SUBTREES __kconfig __weak; extern bool CONFIG_IPV6_SUBTREES __kconfig __weak;
#define RTF_GATEWAY 0x0002
#define IFNAMSIZ 16
#define fib_nh_gw_family nh_common.nhc_gw_family
#define fib_nh_gw6 nh_common.nhc_gw.ipv6
#define fib_nh_dev nh_common.nhc_dev
SEC("iter/ipv6_route") SEC("iter/ipv6_route")
int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx) int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx)
{ {
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */ /* Copyright (c) 2020 Facebook */
/* "undefine" structs in vmlinux.h, because we "override" them below */ #include "bpf_iter.h"
#define bpf_iter_meta bpf_iter_meta___not_used #include "bpf_tracing_net.h"
#define bpf_iter__netlink bpf_iter__netlink___not_used
#include "vmlinux.h"
#undef bpf_iter_meta
#undef bpf_iter__netlink
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h> #include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
#define sk_rmem_alloc sk_backlog.rmem_alloc
#define sk_refcnt __sk_common.skc_refcnt
struct bpf_iter_meta {
struct seq_file *seq;
__u64 session_id;
__u64 seq_num;
} __attribute__((preserve_access_index));
struct bpf_iter__netlink {
struct bpf_iter_meta *meta;
struct netlink_sock *sk;
} __attribute__((preserve_access_index));
static inline struct inode *SOCK_INODE(struct socket *socket) static inline struct inode *SOCK_INODE(struct socket *socket)
{ {
return &container_of(socket, struct socket_alloc, socket)->vfs_inode; return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */ /* Copyright (c) 2020 Facebook */
/* "undefine" structs in vmlinux.h, because we "override" them below */ #include "bpf_iter.h"
#define bpf_iter_meta bpf_iter_meta___not_used
#define bpf_iter__task bpf_iter__task___not_used
#include "vmlinux.h"
#undef bpf_iter_meta
#undef bpf_iter__task
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h> #include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
struct bpf_iter_meta {
struct seq_file *seq;
__u64 session_id;
__u64 seq_num;
} __attribute__((preserve_access_index));
struct bpf_iter__task {
struct bpf_iter_meta *meta;
struct task_struct *task;
} __attribute__((preserve_access_index));
SEC("iter/task") SEC("iter/task")
int dump_task(struct bpf_iter__task *ctx) int dump_task(struct bpf_iter__task *ctx)
{ {
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */ /* Copyright (c) 2020 Facebook */
/* "undefine" structs in vmlinux.h, because we "override" them below */ #include "bpf_iter.h"
#define bpf_iter_meta bpf_iter_meta___not_used
#define bpf_iter__task_file bpf_iter__task_file___not_used
#include "vmlinux.h"
#undef bpf_iter_meta
#undef bpf_iter__task_file
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h> #include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
struct bpf_iter_meta {
struct seq_file *seq;
__u64 session_id;
__u64 seq_num;
} __attribute__((preserve_access_index));
struct bpf_iter__task_file {
struct bpf_iter_meta *meta;
struct task_struct *task;
__u32 fd;
struct file *file;
} __attribute__((preserve_access_index));
SEC("iter/task_file") SEC("iter/task_file")
int dump_task_file(struct bpf_iter__task_file *ctx) int dump_task_file(struct bpf_iter__task_file *ctx)
{ {
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
static int hlist_unhashed_lockless(const struct hlist_node *h)
{
return !(h->pprev);
}
static int timer_pending(const struct timer_list * timer)
{
return !hlist_unhashed_lockless(&timer->entry);
}
extern unsigned CONFIG_HZ __kconfig;
#define USER_HZ 100
#define NSEC_PER_SEC 1000000000ULL
static clock_t jiffies_to_clock_t(unsigned long x)
{
/* The implementation here tailored to a particular
* setting of USER_HZ.
*/
u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
if ((tick_nsec % user_hz_nsec) == 0) {
if (CONFIG_HZ < USER_HZ)
return x * (USER_HZ / CONFIG_HZ);
else
return x / (CONFIG_HZ / USER_HZ);
}
return x * tick_nsec/user_hz_nsec;
}
static clock_t jiffies_delta_to_clock_t(long delta)
{
if (delta <= 0)
return 0;
return jiffies_to_clock_t(delta);
}
static long sock_i_ino(const struct sock *sk)
{
const struct socket *sk_socket = sk->sk_socket;
const struct inode *inode;
unsigned long ino;
if (!sk_socket)
return 0;
inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
return ino;
}
static bool
inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
{
return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
}
static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
{
return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
}
static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
uid_t uid, __u32 seq_num)
{
const struct inet_connection_sock *icsk;
const struct fastopen_queue *fastopenq;
const struct inet_sock *inet;
unsigned long timer_expires;
const struct sock *sp;
__u16 destp, srcp;
__be32 dest, src;
int timer_active;
int rx_queue;
int state;
icsk = &tp->inet_conn;
inet = &icsk->icsk_inet;
sp = &inet->sk;
fastopenq = &icsk->icsk_accept_queue.fastopenq;
dest = inet->inet_daddr;
src = inet->inet_rcv_saddr;
destp = bpf_ntohs(inet->inet_dport);
srcp = bpf_ntohs(inet->inet_sport);
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
timer_expires = icsk->icsk_timeout;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
timer_expires = icsk->icsk_timeout;
} else if (timer_pending(&sp->sk_timer)) {
timer_active = 2;
timer_expires = sp->sk_timer.expires;
} else {
timer_active = 0;
timer_expires = bpf_jiffies64();
}
state = sp->sk_state;
if (state == TCP_LISTEN) {
rx_queue = sp->sk_ack_backlog;
} else {
rx_queue = tp->rcv_nxt - tp->copied_seq;
if (rx_queue < 0)
rx_queue = 0;
}
BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
seq_num, src, srcp, destp, destp);
BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
state,
tp->write_seq - tp->snd_una, rx_queue,
timer_active,
jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
icsk->icsk_retransmits, uid,
icsk->icsk_probes_out,
sock_i_ino(sp),
sp->sk_refcnt.refs.counter);
BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
tp,
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
tp->snd_cwnd,
state == TCP_LISTEN ? fastopenq->max_qlen
: (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
);
return 0;
}
static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
uid_t uid, __u32 seq_num)
{
struct inet_timewait_sock *tw = &ttw->tw_sk;
__u16 destp, srcp;
__be32 dest, src;
long delta;
delta = tw->tw_timer.expires - bpf_jiffies64();
dest = tw->tw_daddr;
src = tw->tw_rcv_saddr;
destp = bpf_ntohs(tw->tw_dport);
srcp = bpf_ntohs(tw->tw_sport);
BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
seq_num, src, srcp, dest, destp);
BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
tw->tw_substate, 0, 0,
3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
tw->tw_refcnt.refs.counter, tw);
return 0;
}
static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
uid_t uid, __u32 seq_num)
{
struct inet_request_sock *irsk = &treq->req;
struct request_sock *req = &irsk->req;
long ttd;
ttd = req->rsk_timer.expires - bpf_jiffies64();
if (ttd < 0)
ttd = 0;
BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
seq_num, irsk->ir_loc_addr,
irsk->ir_num, irsk->ir_rmt_addr,
bpf_ntohs(irsk->ir_rmt_port));
BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
req->num_timeout, uid, 0, 0, 0, req);
return 0;
}
SEC("iter/tcp")
int dump_tcp4(struct bpf_iter__tcp *ctx)
{
struct sock_common *sk_common = ctx->sk_common;
struct seq_file *seq = ctx->meta->seq;
struct tcp_timewait_sock *tw;
struct tcp_request_sock *req;
struct tcp_sock *tp;
uid_t uid = ctx->uid;
__u32 seq_num;
if (sk_common == (void *)0)
return 0;
seq_num = ctx->meta->seq_num;
if (seq_num == 0)
BPF_SEQ_PRINTF(seq, " sl "
"local_address "
"rem_address "
"st tx_queue rx_queue tr tm->when retrnsmt"
" uid timeout inode\n");
if (sk_common->skc_family != AF_INET)
return 0;
tp = bpf_skc_to_tcp_sock(sk_common);
if (tp)
return dump_tcp_sock(seq, tp, uid, seq_num);
tw = bpf_skc_to_tcp_timewait_sock(sk_common);
if (tw)
return dump_tw_sock(seq, tw, uid, seq_num);
req = bpf_skc_to_tcp_request_sock(sk_common);
if (req)
return dump_req_sock(seq, req, uid, seq_num);
return 0;
}
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
static int hlist_unhashed_lockless(const struct hlist_node *h)
{
return !(h->pprev);
}
static int timer_pending(const struct timer_list * timer)
{
return !hlist_unhashed_lockless(&timer->entry);
}
extern unsigned CONFIG_HZ __kconfig;
#define USER_HZ 100
#define NSEC_PER_SEC 1000000000ULL
static clock_t jiffies_to_clock_t(unsigned long x)
{
/* The implementation here tailored to a particular
* setting of USER_HZ.
*/
u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
if ((tick_nsec % user_hz_nsec) == 0) {
if (CONFIG_HZ < USER_HZ)
return x * (USER_HZ / CONFIG_HZ);
else
return x / (CONFIG_HZ / USER_HZ);
}
return x * tick_nsec/user_hz_nsec;
}
static clock_t jiffies_delta_to_clock_t(long delta)
{
if (delta <= 0)
return 0;
return jiffies_to_clock_t(delta);
}
static long sock_i_ino(const struct sock *sk)
{
const struct socket *sk_socket = sk->sk_socket;
const struct inode *inode;
unsigned long ino;
if (!sk_socket)
return 0;
inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
return ino;
}
static bool
inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
{
return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
}
static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
{
return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
}
static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
uid_t uid, __u32 seq_num)
{
const struct inet_connection_sock *icsk;
const struct fastopen_queue *fastopenq;
const struct in6_addr *dest, *src;
const struct inet_sock *inet;
unsigned long timer_expires;
const struct sock *sp;
__u16 destp, srcp;
int timer_active;
int rx_queue;
int state;
icsk = &tp->tcp.inet_conn;
inet = &icsk->icsk_inet;
sp = &inet->sk;
fastopenq = &icsk->icsk_accept_queue.fastopenq;
dest = &sp->sk_v6_daddr;
src = &sp->sk_v6_rcv_saddr;
destp = bpf_ntohs(inet->inet_dport);
srcp = bpf_ntohs(inet->inet_sport);
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
timer_expires = icsk->icsk_timeout;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
timer_expires = icsk->icsk_timeout;
} else if (timer_pending(&sp->sk_timer)) {
timer_active = 2;
timer_expires = sp->sk_timer.expires;
} else {
timer_active = 0;
timer_expires = bpf_jiffies64();
}
state = sp->sk_state;
if (state == TCP_LISTEN) {
rx_queue = sp->sk_ack_backlog;
} else {
rx_queue = tp->tcp.rcv_nxt - tp->tcp.copied_seq;
if (rx_queue < 0)
rx_queue = 0;
}
BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
seq_num,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3], srcp,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp);
BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
state,
tp->tcp.write_seq - tp->tcp.snd_una, rx_queue,
timer_active,
jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
icsk->icsk_retransmits, uid,
icsk->icsk_probes_out,
sock_i_ino(sp),
sp->sk_refcnt.refs.counter);
BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
tp,
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
tp->tcp.snd_cwnd,
state == TCP_LISTEN ? fastopenq->max_qlen
: (tcp_in_initial_slowstart(&tp->tcp) ? -1
: tp->tcp.snd_ssthresh)
);
return 0;
}
static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
uid_t uid, __u32 seq_num)
{
struct inet_timewait_sock *tw = &ttw->tw_sk;
const struct in6_addr *dest, *src;
__u16 destp, srcp;
long delta;
delta = tw->tw_timer.expires - bpf_jiffies64();
dest = &tw->tw_v6_daddr;
src = &tw->tw_v6_rcv_saddr;
destp = bpf_ntohs(tw->tw_dport);
srcp = bpf_ntohs(tw->tw_sport);
BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
seq_num,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3], srcp,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp);
BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
tw->tw_substate, 0, 0,
3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
tw->tw_refcnt.refs.counter, tw);
return 0;
}
static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
uid_t uid, __u32 seq_num)
{
struct inet_request_sock *irsk = &treq->req;
struct request_sock *req = &irsk->req;
struct in6_addr *src, *dest;
long ttd;
ttd = req->rsk_timer.expires - bpf_jiffies64();
src = &irsk->ir_v6_loc_addr;
dest = &irsk->ir_v6_rmt_addr;
if (ttd < 0)
ttd = 0;
BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
seq_num,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3],
irsk->ir_num,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3],
bpf_ntohs(irsk->ir_rmt_port));
BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
req->num_timeout, uid, 0, 0, 0, req);
return 0;
}
SEC("iter/tcp")
int dump_tcp6(struct bpf_iter__tcp *ctx)
{
struct sock_common *sk_common = ctx->sk_common;
struct seq_file *seq = ctx->meta->seq;
struct tcp_timewait_sock *tw;
struct tcp_request_sock *req;
struct tcp6_sock *tp;
uid_t uid = ctx->uid;
__u32 seq_num;
if (sk_common == (void *)0)
return 0;
seq_num = ctx->meta->seq_num;
if (seq_num == 0)
BPF_SEQ_PRINTF(seq, " sl "
"local_address "
"remote_address "
"st tx_queue rx_queue tr tm->when retrnsmt"
" uid timeout inode\n");
if (sk_common->skc_family != AF_INET6)
return 0;
tp = bpf_skc_to_tcp6_sock(sk_common);
if (tp)
return dump_tcp6_sock(seq, tp, uid, seq_num);
tw = bpf_skc_to_tcp_timewait_sock(sk_common);
if (tw)
return dump_tw_sock(seq, tw, uid, seq_num);
req = bpf_skc_to_tcp_request_sock(sk_common);
if (req)
return dump_req_sock(seq, req, uid, seq_num);
return 0;
}
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */ /* Copyright (c) 2020 Facebook */
#define bpf_iter_meta bpf_iter_meta___not_used #include "bpf_iter.h"
#define bpf_iter__task bpf_iter__task___not_used
#include "vmlinux.h"
#undef bpf_iter_meta
#undef bpf_iter__task
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
struct bpf_iter_meta {
struct seq_file *seq;
__u64 session_id;
__u64 seq_num;
} __attribute__((preserve_access_index));
struct bpf_iter__task {
struct bpf_iter_meta *meta;
struct task_struct *task;
} __attribute__((preserve_access_index));
SEC("iter/task") SEC("iter/task")
int dump_task(struct bpf_iter__task *ctx) int dump_task(struct bpf_iter__task *ctx)
{ {
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */ /* Copyright (c) 2020 Facebook */
#define bpf_iter_meta bpf_iter_meta___not_used #include "bpf_iter.h"
#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
#include "vmlinux.h"
#undef bpf_iter_meta
#undef bpf_iter__bpf_map
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
struct bpf_iter_meta {
struct seq_file *seq;
__u64 session_id;
__u64 seq_num;
} __attribute__((preserve_access_index));
struct bpf_iter__bpf_map {
struct bpf_iter_meta *meta;
struct bpf_map *map;
} __attribute__((preserve_access_index));
__u32 map1_id = 0, map2_id = 0; __u32 map1_id = 0, map2_id = 0;
__u32 map1_accessed = 0, map2_accessed = 0; __u32 map1_accessed = 0, map2_accessed = 0;
__u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0; __u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
......
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2020 Facebook */ /* Copyright (c) 2020 Facebook */
/* "undefine" structs in vmlinux.h, because we "override" them below */ #include "bpf_iter.h"
#define bpf_iter_meta bpf_iter_meta___not_used
#define bpf_iter__task bpf_iter__task___not_used
#include "vmlinux.h"
#undef bpf_iter_meta
#undef bpf_iter__task
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
int count = 0; int count = 0;
struct bpf_iter_meta {
struct seq_file *seq;
__u64 session_id;
__u64 seq_num;
} __attribute__((preserve_access_index));
struct bpf_iter__task {
struct bpf_iter_meta *meta;
struct task_struct *task;
} __attribute__((preserve_access_index));
SEC("iter/task") SEC("iter/task")
int dump_task(struct bpf_iter__task *ctx) int dump_task(struct bpf_iter__task *ctx)
{ {
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
static long sock_i_ino(const struct sock *sk)
{
const struct socket *sk_socket = sk->sk_socket;
const struct inode *inode;
unsigned long ino;
if (!sk_socket)
return 0;
inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
return ino;
}
SEC("iter/udp")
int dump_udp4(struct bpf_iter__udp *ctx)
{
struct seq_file *seq = ctx->meta->seq;
struct udp_sock *udp_sk = ctx->udp_sk;
struct inet_sock *inet;
__u16 srcp, destp;
__be32 dest, src;
__u32 seq_num;
int rqueue;
if (udp_sk == (void *)0)
return 0;
seq_num = ctx->meta->seq_num;
if (seq_num == 0)
BPF_SEQ_PRINTF(seq,
" sl local_address rem_address st tx_queue "
"rx_queue tr tm->when retrnsmt uid timeout "
"inode ref pointer drops\n");
/* filter out udp6 sockets */
inet = &udp_sk->inet;
if (inet->sk.sk_family == AF_INET6)
return 0;
inet = &udp_sk->inet;
dest = inet->inet_daddr;
src = inet->inet_rcv_saddr;
srcp = bpf_ntohs(inet->inet_sport);
destp = bpf_ntohs(inet->inet_dport);
rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit;
BPF_SEQ_PRINTF(seq, "%5d: %08X:%04X %08X:%04X ",
ctx->bucket, src, srcp, dest, destp);
BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
inet->sk.sk_state,
inet->sk.sk_wmem_alloc.refs.counter - 1,
rqueue,
0, 0L, 0, ctx->uid, 0,
sock_i_ino(&inet->sk),
inet->sk.sk_refcnt.refs.counter, udp_sk,
inet->sk.sk_drops.counter);
return 0;
}
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
#define IPV6_SEQ_DGRAM_HEADER \
" sl " \
"local_address " \
"remote_address " \
"st tx_queue rx_queue tr tm->when retrnsmt" \
" uid timeout inode ref pointer drops\n"
static long sock_i_ino(const struct sock *sk)
{
const struct socket *sk_socket = sk->sk_socket;
const struct inode *inode;
unsigned long ino;
if (!sk_socket)
return 0;
inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
return ino;
}
SEC("iter/udp")
int dump_udp6(struct bpf_iter__udp *ctx)
{
struct seq_file *seq = ctx->meta->seq;
struct udp_sock *udp_sk = ctx->udp_sk;
const struct in6_addr *dest, *src;
struct udp6_sock *udp6_sk;
struct inet_sock *inet;
__u16 srcp, destp;
__u32 seq_num;
int rqueue;
if (udp_sk == (void *)0)
return 0;
seq_num = ctx->meta->seq_num;
if (seq_num == 0)
BPF_SEQ_PRINTF(seq, IPV6_SEQ_DGRAM_HEADER);
udp6_sk = bpf_skc_to_udp6_sock(udp_sk);
if (udp6_sk == (void *)0)
return 0;
inet = &udp_sk->inet;
srcp = bpf_ntohs(inet->inet_sport);
destp = bpf_ntohs(inet->inet_dport);
rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit;
dest = &inet->sk.sk_v6_daddr;
src = &inet->sk.sk_v6_rcv_saddr;
BPF_SEQ_PRINTF(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
ctx->bucket,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3], srcp,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp);
BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
inet->sk.sk_state,
inet->sk.sk_wmem_alloc.refs.counter - 1,
rqueue,
0, 0L, 0, ctx->uid, 0,
sock_i_ino(&inet->sk),
inet->sk.sk_refcnt.refs.counter, udp_sk,
inet->sk.sk_drops.counter);
return 0;
}
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#ifndef __BPF_TRACING_NET_H__
#define __BPF_TRACING_NET_H__
#define AF_INET 2
#define AF_INET6 10
#define ICSK_TIME_RETRANS 1
#define ICSK_TIME_PROBE0 3
#define ICSK_TIME_LOSS_PROBE 5
#define ICSK_TIME_REO_TIMEOUT 6
#define IFNAMSIZ 16
#define RTF_GATEWAY 0x0002
#define TCP_INFINITE_SSTHRESH 0x7fffffff
#define TCP_PINGPONG_THRESH 3
#define fib_nh_dev nh_common.nhc_dev
#define fib_nh_gw_family nh_common.nhc_gw_family
#define fib_nh_gw6 nh_common.nhc_gw.ipv6
#define inet_daddr sk.__sk_common.skc_daddr
#define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr
#define inet_dport sk.__sk_common.skc_dport
#define ir_loc_addr req.__req_common.skc_rcv_saddr
#define ir_num req.__req_common.skc_num
#define ir_rmt_addr req.__req_common.skc_daddr
#define ir_rmt_port req.__req_common.skc_dport
#define ir_v6_rmt_addr req.__req_common.skc_v6_daddr
#define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr
#define sk_family __sk_common.skc_family
#define sk_rmem_alloc sk_backlog.rmem_alloc
#define sk_refcnt __sk_common.skc_refcnt
#define sk_state __sk_common.skc_state
#define sk_v6_daddr __sk_common.skc_v6_daddr
#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
#define s6_addr32 in6_u.u6_addr32
#define tw_daddr __tw_common.skc_daddr
#define tw_rcv_saddr __tw_common.skc_rcv_saddr
#define tw_dport __tw_common.skc_dport
#define tw_refcnt __tw_common.skc_refcnt
#define tw_v6_daddr __tw_common.skc_v6_daddr
#define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment