Commit 1e0ab707 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'sock_map: clean up and refactor code for BPF_SK_SKB_VERDICT'

Cong Wang says:

====================

From: Cong Wang <cong.wang@bytedance.com>

This patchset is the first series of patches separated out from
the original large patchset, to make reviews easier. This patchset
does not add any new feature or change any functionality but merely
cleans up the existing sockmap and skmsg code and refactors it, to
prepare for the patches followed up. This passed all BPF selftests.

To see the big picture, the original whole patchset is available
on github: https://github.com/congwang/linux/tree/sockmap

and this patchset is also available on github:
https://github.com/congwang/linux/tree/sockmap1
---
v7: add 1 trivial cleanup patch
    define a mask for sk_redir
    fix CONFIG_BPF_SYSCALL in include/net/udp.h
    make sk_psock_done_strp() static
    move skb_bpf_redirect_clear() to sk_psock_backlog()

v6: fix !CONFIG_INET case

v5: improve CONFIG_BPF_SYSCALL dependency
    add 3 trivial cleanup patches

v4: reuse skb dst instead of skb ext
    fix another Kconfig error

v3: fix a few Kconfig compile errors
    remove an unused variable
    add a comment for bpf_convert_data_end_access()

v2: split the original patchset
    compute data_end with bpf_convert_data_end_access()
    get rid of psock->bpf_running
    reduce the scope of CONFIG_BPF_STREAM_PARSER
    do not add CONFIG_BPF_SOCK_MAP
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents a83586a7 ff9614b8
...@@ -1778,22 +1778,24 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) ...@@ -1778,22 +1778,24 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
} }
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
#if defined(CONFIG_BPF_STREAM_PARSER) #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
struct bpf_prog *old, u32 which);
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
void sock_map_unhash(struct sock *sk); void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout); void sock_map_close(struct sock *sk, long timeout);
void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
void *value);
int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
void *value, u64 map_flags);
#else #else
static inline int sock_map_prog_update(struct bpf_map *map, static inline void bpf_sk_reuseport_detach(struct sock *sk)
struct bpf_prog *prog,
struct bpf_prog *old, u32 which)
{ {
return -EOPNOTSUPP;
} }
#ifdef CONFIG_BPF_SYSCALL
static inline int sock_map_get_from_fd(const union bpf_attr *attr, static inline int sock_map_get_from_fd(const union bpf_attr *attr,
struct bpf_prog *prog) struct bpf_prog *prog)
{ {
...@@ -1811,20 +1813,7 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void ...@@ -1811,20 +1813,7 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
{ {
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
#endif /* CONFIG_BPF_STREAM_PARSER */
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
void *value);
int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
void *value, u64 map_flags);
#else
static inline void bpf_sk_reuseport_detach(struct sock *sk)
{
}
#ifdef CONFIG_BPF_SYSCALL
static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
void *key, void *value) void *key, void *value)
{ {
......
...@@ -103,10 +103,6 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) ...@@ -103,10 +103,6 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
#if defined(CONFIG_BPF_STREAM_PARSER)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
#endif
#ifdef CONFIG_BPF_LSM #ifdef CONFIG_BPF_LSM
BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
#endif #endif
...@@ -116,6 +112,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) ...@@ -116,6 +112,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
#endif #endif
#ifdef CONFIG_INET #ifdef CONFIG_INET
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
#endif #endif
#endif #endif
......
...@@ -755,6 +755,9 @@ struct sk_buff { ...@@ -755,6 +755,9 @@ struct sk_buff {
void (*destructor)(struct sk_buff *skb); void (*destructor)(struct sk_buff *skb);
}; };
struct list_head tcp_tsorted_anchor; struct list_head tcp_tsorted_anchor;
#ifdef CONFIG_NET_SOCK_MSG
unsigned long _sk_redir;
#endif
}; };
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
......
...@@ -56,8 +56,8 @@ struct sk_msg { ...@@ -56,8 +56,8 @@ struct sk_msg {
struct sk_psock_progs { struct sk_psock_progs {
struct bpf_prog *msg_parser; struct bpf_prog *msg_parser;
struct bpf_prog *skb_parser; struct bpf_prog *stream_parser;
struct bpf_prog *skb_verdict; struct bpf_prog *stream_verdict;
}; };
enum sk_psock_state_bits { enum sk_psock_state_bits {
...@@ -70,12 +70,6 @@ struct sk_psock_link { ...@@ -70,12 +70,6 @@ struct sk_psock_link {
void *link_raw; void *link_raw;
}; };
struct sk_psock_parser {
struct strparser strp;
bool enabled;
void (*saved_data_ready)(struct sock *sk);
};
struct sk_psock_work_state { struct sk_psock_work_state {
struct sk_buff *skb; struct sk_buff *skb;
u32 len; u32 len;
...@@ -90,7 +84,9 @@ struct sk_psock { ...@@ -90,7 +84,9 @@ struct sk_psock {
u32 eval; u32 eval;
struct sk_msg *cork; struct sk_msg *cork;
struct sk_psock_progs progs; struct sk_psock_progs progs;
struct sk_psock_parser parser; #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
struct strparser strp;
#endif
struct sk_buff_head ingress_skb; struct sk_buff_head ingress_skb;
struct list_head ingress_msg; struct list_head ingress_msg;
unsigned long state; unsigned long state;
...@@ -100,6 +96,7 @@ struct sk_psock { ...@@ -100,6 +96,7 @@ struct sk_psock {
void (*saved_unhash)(struct sock *sk); void (*saved_unhash)(struct sock *sk);
void (*saved_close)(struct sock *sk, long timeout); void (*saved_close)(struct sock *sk, long timeout);
void (*saved_write_space)(struct sock *sk); void (*saved_write_space)(struct sock *sk);
void (*saved_data_ready)(struct sock *sk);
struct proto *sk_proto; struct proto *sk_proto;
struct sk_psock_work_state work_state; struct sk_psock_work_state work_state;
struct work_struct work; struct work_struct work;
...@@ -305,9 +302,25 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err) ...@@ -305,9 +302,25 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
struct sk_psock *sk_psock_init(struct sock *sk, int node); struct sk_psock *sk_psock_init(struct sock *sk, int node);
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock); int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock); void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock); void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
#else
static inline int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
{
return -EOPNOTSUPP;
}
static inline void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
{
}
static inline void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
{
}
#endif
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock); void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock);
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock); void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
...@@ -327,8 +340,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link) ...@@ -327,8 +340,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link)
struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock); struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);
void __sk_psock_purge_ingress_msg(struct sk_psock *psock);
static inline void sk_psock_cork_free(struct sk_psock *psock) static inline void sk_psock_cork_free(struct sk_psock *psock)
{ {
if (psock->cork) { if (psock->cork) {
...@@ -389,7 +400,6 @@ static inline struct sk_psock *sk_psock_get(struct sock *sk) ...@@ -389,7 +400,6 @@ static inline struct sk_psock *sk_psock_get(struct sock *sk)
return psock; return psock;
} }
void sk_psock_stop(struct sock *sk, struct sk_psock *psock);
void sk_psock_drop(struct sock *sk, struct sk_psock *psock); void sk_psock_drop(struct sock *sk, struct sk_psock *psock);
static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
...@@ -400,8 +410,8 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) ...@@ -400,8 +410,8 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock) static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock)
{ {
if (psock->parser.enabled) if (psock->saved_data_ready)
psock->parser.saved_data_ready(sk); psock->saved_data_ready(sk);
else else
sk->sk_data_ready(sk); sk->sk_data_ready(sk);
} }
...@@ -430,8 +440,8 @@ static inline int psock_replace_prog(struct bpf_prog **pprog, ...@@ -430,8 +440,8 @@ static inline int psock_replace_prog(struct bpf_prog **pprog,
static inline void psock_progs_drop(struct sk_psock_progs *progs) static inline void psock_progs_drop(struct sk_psock_progs *progs)
{ {
psock_set_prog(&progs->msg_parser, NULL); psock_set_prog(&progs->msg_parser, NULL);
psock_set_prog(&progs->skb_parser, NULL); psock_set_prog(&progs->stream_parser, NULL);
psock_set_prog(&progs->skb_verdict, NULL); psock_set_prog(&progs->stream_verdict, NULL);
} }
int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb); int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
...@@ -440,6 +450,44 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) ...@@ -440,6 +450,44 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
{ {
if (!psock) if (!psock)
return false; return false;
return psock->parser.enabled; return !!psock->saved_data_ready;
}
#if IS_ENABLED(CONFIG_NET_SOCK_MSG)
/* We only have one bit so far. */
#define BPF_F_PTR_MASK ~(BPF_F_INGRESS)
static inline bool skb_bpf_ingress(const struct sk_buff *skb)
{
unsigned long sk_redir = skb->_sk_redir;
return sk_redir & BPF_F_INGRESS;
}
static inline void skb_bpf_set_ingress(struct sk_buff *skb)
{
skb->_sk_redir |= BPF_F_INGRESS;
}
static inline void skb_bpf_set_redir(struct sk_buff *skb, struct sock *sk_redir,
bool ingress)
{
skb->_sk_redir = (unsigned long)sk_redir;
if (ingress)
skb->_sk_redir |= BPF_F_INGRESS;
}
static inline struct sock *skb_bpf_redirect_fetch(const struct sk_buff *skb)
{
unsigned long sk_redir = skb->_sk_redir;
return (struct sock *)(sk_redir & BPF_F_PTR_MASK);
}
static inline void skb_bpf_redirect_clear(struct sk_buff *skb)
{
skb->_sk_redir = 0;
} }
#endif /* CONFIG_NET_SOCK_MSG */
#endif /* _LINUX_SKMSG_H */ #endif /* _LINUX_SKMSG_H */
...@@ -883,36 +883,11 @@ struct tcp_skb_cb { ...@@ -883,36 +883,11 @@ struct tcp_skb_cb {
struct inet6_skb_parm h6; struct inet6_skb_parm h6;
#endif #endif
} header; /* For incoming skbs */ } header; /* For incoming skbs */
struct {
__u32 flags;
struct sock *sk_redir;
void *data_end;
} bpf;
}; };
}; };
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
{
TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
}
static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
{
return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
}
static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb)
{
return TCP_SKB_CB(skb)->bpf.sk_redir;
}
static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb)
{
TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
}
extern const struct inet_connection_sock_af_ops ipv4_specific; extern const struct inet_connection_sock_af_ops ipv4_specific;
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
...@@ -2222,25 +2197,27 @@ void tcp_update_ulp(struct sock *sk, struct proto *p, ...@@ -2222,25 +2197,27 @@ void tcp_update_ulp(struct sock *sk, struct proto *p,
__MODULE_INFO(alias, alias_userspace, name); \ __MODULE_INFO(alias, alias_userspace, name); \
__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name) __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
#ifdef CONFIG_NET_SOCK_MSG
struct sk_msg; struct sk_msg;
struct sk_psock; struct sk_psock;
#ifdef CONFIG_BPF_STREAM_PARSER #ifdef CONFIG_BPF_SYSCALL
struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#else #endif /* CONFIG_BPF_SYSCALL */
static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
}
#endif /* CONFIG_BPF_STREAM_PARSER */
#ifdef CONFIG_NET_SOCK_MSG
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
int flags); int flags);
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
struct msghdr *msg, int len, int flags); struct msghdr *msg, int len, int flags);
#endif /* CONFIG_NET_SOCK_MSG */ #endif /* CONFIG_NET_SOCK_MSG */
#if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
}
#endif
#ifdef CONFIG_CGROUP_BPF #ifdef CONFIG_CGROUP_BPF
static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
struct sk_buff *skb, struct sk_buff *skb,
......
...@@ -515,9 +515,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, ...@@ -515,9 +515,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
return segs; return segs;
} }
#ifdef CONFIG_BPF_STREAM_PARSER #ifdef CONFIG_BPF_SYSCALL
struct sk_psock; struct sk_psock;
struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
#endif /* BPF_STREAM_PARSER */ #endif
#endif /* _UDP_H */ #endif /* _UDP_H */
...@@ -1702,6 +1702,7 @@ config BPF_SYSCALL ...@@ -1702,6 +1702,7 @@ config BPF_SYSCALL
select BPF select BPF
select IRQ_WORK select IRQ_WORK
select TASKS_TRACE_RCU select TASKS_TRACE_RCU
select NET_SOCK_MSG if INET
default n default n
help help
Enable the bpf() system call that allows to manipulate eBPF Enable the bpf() system call that allows to manipulate eBPF
......
...@@ -317,13 +317,9 @@ config BPF_STREAM_PARSER ...@@ -317,13 +317,9 @@ config BPF_STREAM_PARSER
select STREAM_PARSER select STREAM_PARSER
select NET_SOCK_MSG select NET_SOCK_MSG
help help
Enabling this allows a stream parser to be used with Enabling this allows a TCP stream parser to be used with
BPF_MAP_TYPE_SOCKMAP. BPF_MAP_TYPE_SOCKMAP.
BPF_MAP_TYPE_SOCKMAP provides a map type to use with network sockets.
It can be used to enforce socket policy, implement socket redirects,
etc.
config NET_FLOW_LIMIT config NET_FLOW_LIMIT
bool bool
depends on RPS depends on RPS
......
...@@ -16,7 +16,6 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \ ...@@ -16,7 +16,6 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
obj-y += net-sysfs.o obj-y += net-sysfs.o
obj-$(CONFIG_PAGE_POOL) += page_pool.o obj-$(CONFIG_PAGE_POOL) += page_pool.o
obj-$(CONFIG_PROC_FS) += net-procfs.o obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NETPOLL) += netpoll.o
obj-$(CONFIG_FIB_RULES) += fib_rules.o obj-$(CONFIG_FIB_RULES) += fib_rules.o
...@@ -28,10 +27,13 @@ obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o ...@@ -28,10 +27,13 @@ obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
obj-$(CONFIG_LWTUNNEL) += lwtunnel.o obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
obj-$(CONFIG_BPF_STREAM_PARSER) += sock_map.o
obj-$(CONFIG_DST_CACHE) += dst_cache.o obj-$(CONFIG_DST_CACHE) += dst_cache.o
obj-$(CONFIG_HWBM) += hwbm.o obj-$(CONFIG_HWBM) += hwbm.o
obj-$(CONFIG_NET_DEVLINK) += devlink.o obj-$(CONFIG_NET_DEVLINK) += devlink.o
obj-$(CONFIG_GRO_CELLS) += gro_cells.o obj-$(CONFIG_GRO_CELLS) += gro_cells.o
obj-$(CONFIG_FAILOVER) += failover.o obj-$(CONFIG_FAILOVER) += failover.o
ifeq ($(CONFIG_INET),y)
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
endif
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
...@@ -1863,10 +1863,7 @@ static const struct bpf_func_proto bpf_sk_fullsock_proto = { ...@@ -1863,10 +1863,7 @@ static const struct bpf_func_proto bpf_sk_fullsock_proto = {
static inline int sk_skb_try_make_writable(struct sk_buff *skb, static inline int sk_skb_try_make_writable(struct sk_buff *skb,
unsigned int write_len) unsigned int write_len)
{ {
int err = __bpf_try_make_writable(skb, write_len); return __bpf_try_make_writable(skb, write_len);
bpf_compute_data_end_sk_skb(skb);
return err;
} }
BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len) BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
...@@ -3577,7 +3574,6 @@ BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, ...@@ -3577,7 +3574,6 @@ BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
return -ENOMEM; return -ENOMEM;
__skb_pull(skb, len_diff_abs); __skb_pull(skb, len_diff_abs);
} }
bpf_compute_data_end_sk_skb(skb);
if (tls_sw_has_ctx_rx(skb->sk)) { if (tls_sw_has_ctx_rx(skb->sk)) {
struct strp_msg *rxm = strp_msg(skb); struct strp_msg *rxm = strp_msg(skb);
...@@ -3742,10 +3738,7 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = { ...@@ -3742,10 +3738,7 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len, BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
u64, flags) u64, flags)
{ {
int ret = __bpf_skb_change_tail(skb, new_len, flags); return __bpf_skb_change_tail(skb, new_len, flags);
bpf_compute_data_end_sk_skb(skb);
return ret;
} }
static const struct bpf_func_proto sk_skb_change_tail_proto = { static const struct bpf_func_proto sk_skb_change_tail_proto = {
...@@ -3808,10 +3801,7 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = { ...@@ -3808,10 +3801,7 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room, BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
u64, flags) u64, flags)
{ {
int ret = __bpf_skb_change_head(skb, head_room, flags); return __bpf_skb_change_head(skb, head_room, flags);
bpf_compute_data_end_sk_skb(skb);
return ret;
} }
static const struct bpf_func_proto sk_skb_change_head_proto = { static const struct bpf_func_proto sk_skb_change_head_proto = {
...@@ -9655,22 +9645,40 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, ...@@ -9655,22 +9645,40 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf; return insn - insn_buf;
} }
/* data_end = skb->data + skb_headlen() */
static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
struct bpf_insn *insn)
{
/* si->dst_reg = skb->data */
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
si->dst_reg, si->src_reg,
offsetof(struct sk_buff, data));
/* AX = skb->len */
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
BPF_REG_AX, si->src_reg,
offsetof(struct sk_buff, len));
/* si->dst_reg = skb->data + skb->len */
*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
/* AX = skb->data_len */
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
BPF_REG_AX, si->src_reg,
offsetof(struct sk_buff, data_len));
/* si->dst_reg = skb->data + skb->len - skb->data_len */
*insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);
return insn;
}
static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si, const struct bpf_insn *si,
struct bpf_insn *insn_buf, struct bpf_insn *insn_buf,
struct bpf_prog *prog, u32 *target_size) struct bpf_prog *prog, u32 *target_size)
{ {
struct bpf_insn *insn = insn_buf; struct bpf_insn *insn = insn_buf;
int off;
switch (si->off) { switch (si->off) {
case offsetof(struct __sk_buff, data_end): case offsetof(struct __sk_buff, data_end):
off = si->off; insn = bpf_convert_data_end_access(si, insn);
off -= offsetof(struct __sk_buff, data_end);
off += offsetof(struct sk_buff, cb);
off += offsetof(struct tcp_skb_cb, bpf.data_end);
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
si->src_reg, off);
break; break;
default: default:
return bpf_convert_ctx_access(type, si, insn_buf, prog, return bpf_convert_ctx_access(type, si, insn_buf, prog,
......
This diff is collapsed.
...@@ -24,6 +24,9 @@ struct bpf_stab { ...@@ -24,6 +24,9 @@ struct bpf_stab {
#define SOCK_CREATE_FLAG_MASK \ #define SOCK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
struct bpf_prog *old, u32 which);
static struct bpf_map *sock_map_alloc(union bpf_attr *attr) static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
{ {
struct bpf_stab *stab; struct bpf_stab *stab;
...@@ -148,9 +151,9 @@ static void sock_map_del_link(struct sock *sk, ...@@ -148,9 +151,9 @@ static void sock_map_del_link(struct sock *sk,
struct bpf_map *map = link->map; struct bpf_map *map = link->map;
struct bpf_stab *stab = container_of(map, struct bpf_stab, struct bpf_stab *stab = container_of(map, struct bpf_stab,
map); map);
if (psock->parser.enabled && stab->progs.skb_parser) if (psock->saved_data_ready && stab->progs.stream_parser)
strp_stop = true; strp_stop = true;
if (psock->parser.enabled && stab->progs.skb_verdict) if (psock->saved_data_ready && stab->progs.stream_verdict)
verdict_stop = true; verdict_stop = true;
list_del(&link->list); list_del(&link->list);
sk_psock_free_link(link); sk_psock_free_link(link);
...@@ -224,23 +227,23 @@ static struct sk_psock *sock_map_psock_get_checked(struct sock *sk) ...@@ -224,23 +227,23 @@ static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)
static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
struct sock *sk) struct sock *sk)
{ {
struct bpf_prog *msg_parser, *skb_parser, *skb_verdict; struct bpf_prog *msg_parser, *stream_parser, *stream_verdict;
struct sk_psock *psock; struct sk_psock *psock;
int ret; int ret;
skb_verdict = READ_ONCE(progs->skb_verdict); stream_verdict = READ_ONCE(progs->stream_verdict);
if (skb_verdict) { if (stream_verdict) {
skb_verdict = bpf_prog_inc_not_zero(skb_verdict); stream_verdict = bpf_prog_inc_not_zero(stream_verdict);
if (IS_ERR(skb_verdict)) if (IS_ERR(stream_verdict))
return PTR_ERR(skb_verdict); return PTR_ERR(stream_verdict);
} }
skb_parser = READ_ONCE(progs->skb_parser); stream_parser = READ_ONCE(progs->stream_parser);
if (skb_parser) { if (stream_parser) {
skb_parser = bpf_prog_inc_not_zero(skb_parser); stream_parser = bpf_prog_inc_not_zero(stream_parser);
if (IS_ERR(skb_parser)) { if (IS_ERR(stream_parser)) {
ret = PTR_ERR(skb_parser); ret = PTR_ERR(stream_parser);
goto out_put_skb_verdict; goto out_put_stream_verdict;
} }
} }
...@@ -249,7 +252,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, ...@@ -249,7 +252,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
msg_parser = bpf_prog_inc_not_zero(msg_parser); msg_parser = bpf_prog_inc_not_zero(msg_parser);
if (IS_ERR(msg_parser)) { if (IS_ERR(msg_parser)) {
ret = PTR_ERR(msg_parser); ret = PTR_ERR(msg_parser);
goto out_put_skb_parser; goto out_put_stream_parser;
} }
} }
...@@ -261,8 +264,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, ...@@ -261,8 +264,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
if (psock) { if (psock) {
if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) || if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
(skb_parser && READ_ONCE(psock->progs.skb_parser)) || (stream_parser && READ_ONCE(psock->progs.stream_parser)) ||
(skb_verdict && READ_ONCE(psock->progs.skb_verdict))) { (stream_verdict && READ_ONCE(psock->progs.stream_verdict))) {
sk_psock_put(sk, psock); sk_psock_put(sk, psock);
ret = -EBUSY; ret = -EBUSY;
goto out_progs; goto out_progs;
...@@ -283,15 +286,15 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, ...@@ -283,15 +286,15 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
goto out_drop; goto out_drop;
write_lock_bh(&sk->sk_callback_lock); write_lock_bh(&sk->sk_callback_lock);
if (skb_parser && skb_verdict && !psock->parser.enabled) { if (stream_parser && stream_verdict && !psock->saved_data_ready) {
ret = sk_psock_init_strp(sk, psock); ret = sk_psock_init_strp(sk, psock);
if (ret) if (ret)
goto out_unlock_drop; goto out_unlock_drop;
psock_set_prog(&psock->progs.skb_verdict, skb_verdict); psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
psock_set_prog(&psock->progs.skb_parser, skb_parser); psock_set_prog(&psock->progs.stream_parser, stream_parser);
sk_psock_start_strp(sk, psock); sk_psock_start_strp(sk, psock);
} else if (!skb_parser && skb_verdict && !psock->parser.enabled) { } else if (!stream_parser && stream_verdict && !psock->saved_data_ready) {
psock_set_prog(&psock->progs.skb_verdict, skb_verdict); psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
sk_psock_start_verdict(sk,psock); sk_psock_start_verdict(sk,psock);
} }
write_unlock_bh(&sk->sk_callback_lock); write_unlock_bh(&sk->sk_callback_lock);
...@@ -303,12 +306,12 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, ...@@ -303,12 +306,12 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
out_progs: out_progs:
if (msg_parser) if (msg_parser)
bpf_prog_put(msg_parser); bpf_prog_put(msg_parser);
out_put_skb_parser: out_put_stream_parser:
if (skb_parser) if (stream_parser)
bpf_prog_put(skb_parser); bpf_prog_put(stream_parser);
out_put_skb_verdict: out_put_stream_verdict:
if (skb_verdict) if (stream_verdict)
bpf_prog_put(skb_verdict); bpf_prog_put(stream_verdict);
return ret; return ret;
} }
...@@ -657,7 +660,6 @@ const struct bpf_func_proto bpf_sock_map_update_proto = { ...@@ -657,7 +660,6 @@ const struct bpf_func_proto bpf_sock_map_update_proto = {
BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
struct bpf_map *, map, u32, key, u64, flags) struct bpf_map *, map, u32, key, u64, flags)
{ {
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
struct sock *sk; struct sock *sk;
if (unlikely(flags & ~(BPF_F_INGRESS))) if (unlikely(flags & ~(BPF_F_INGRESS)))
...@@ -667,8 +669,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, ...@@ -667,8 +669,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
if (unlikely(!sk || !sock_map_redirect_allowed(sk))) if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP; return SK_DROP;
tcb->bpf.flags = flags; skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS);
tcb->bpf.sk_redir = sk;
return SK_PASS; return SK_PASS;
} }
...@@ -1250,7 +1251,6 @@ const struct bpf_func_proto bpf_sock_hash_update_proto = { ...@@ -1250,7 +1251,6 @@ const struct bpf_func_proto bpf_sock_hash_update_proto = {
BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb, BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
struct bpf_map *, map, void *, key, u64, flags) struct bpf_map *, map, void *, key, u64, flags)
{ {
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
struct sock *sk; struct sock *sk;
if (unlikely(flags & ~(BPF_F_INGRESS))) if (unlikely(flags & ~(BPF_F_INGRESS)))
...@@ -1260,8 +1260,7 @@ BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb, ...@@ -1260,8 +1260,7 @@ BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
if (unlikely(!sk || !sock_map_redirect_allowed(sk))) if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP; return SK_DROP;
tcb->bpf.flags = flags; skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS);
tcb->bpf.sk_redir = sk;
return SK_PASS; return SK_PASS;
} }
...@@ -1448,8 +1447,8 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) ...@@ -1448,8 +1447,8 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
return NULL; return NULL;
} }
int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
struct bpf_prog *old, u32 which) struct bpf_prog *old, u32 which)
{ {
struct sk_psock_progs *progs = sock_map_progs(map); struct sk_psock_progs *progs = sock_map_progs(map);
struct bpf_prog **pprog; struct bpf_prog **pprog;
...@@ -1461,11 +1460,13 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, ...@@ -1461,11 +1460,13 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
case BPF_SK_MSG_VERDICT: case BPF_SK_MSG_VERDICT:
pprog = &progs->msg_parser; pprog = &progs->msg_parser;
break; break;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_PARSER:
pprog = &progs->skb_parser; pprog = &progs->stream_parser;
break; break;
#endif
case BPF_SK_SKB_STREAM_VERDICT: case BPF_SK_SKB_STREAM_VERDICT:
pprog = &progs->skb_verdict; pprog = &progs->stream_verdict;
break; break;
default: default:
return -EOPNOTSUPP; return -EOPNOTSUPP;
......
...@@ -62,7 +62,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o ...@@ -62,7 +62,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
obj-$(CONFIG_BPF_STREAM_PARSER) += udp_bpf.o obj-$(CONFIG_BPF_SYSCALL) += udp_bpf.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
......
...@@ -229,7 +229,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, ...@@ -229,7 +229,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
} }
EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir); EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
#ifdef CONFIG_BPF_STREAM_PARSER #ifdef CONFIG_BPF_SYSCALL
static bool tcp_bpf_stream_read(const struct sock *sk) static bool tcp_bpf_stream_read(const struct sock *sk)
{ {
struct sk_psock *psock; struct sk_psock *psock;
...@@ -629,4 +629,4 @@ void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) ...@@ -629,4 +629,4 @@ void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE]) if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
newsk->sk_prot = sk->sk_prot_creator; newsk->sk_prot = sk->sk_prot_creator;
} }
#endif /* CONFIG_BPF_STREAM_PARSER */ #endif /* CONFIG_BPF_SYSCALL */
...@@ -1014,8 +1014,8 @@ static void test_skb_redir_to_connected(struct test_sockmap_listen *skel, ...@@ -1014,8 +1014,8 @@ static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family, struct bpf_map *inner_map, int family,
int sotype) int sotype)
{ {
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
int parser = bpf_program__fd(skel->progs.prog_skb_parser); int parser = bpf_program__fd(skel->progs.prog_stream_parser);
int verdict_map = bpf_map__fd(skel->maps.verdict_map); int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map); int sock_map = bpf_map__fd(inner_map);
int err; int err;
...@@ -1125,8 +1125,8 @@ static void test_skb_redir_to_listening(struct test_sockmap_listen *skel, ...@@ -1125,8 +1125,8 @@ static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family, struct bpf_map *inner_map, int family,
int sotype) int sotype)
{ {
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
int parser = bpf_program__fd(skel->progs.prog_skb_parser); int parser = bpf_program__fd(skel->progs.prog_stream_parser);
int verdict_map = bpf_map__fd(skel->maps.verdict_map); int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map); int sock_map = bpf_map__fd(inner_map);
int err; int err;
......
...@@ -31,13 +31,13 @@ struct { ...@@ -31,13 +31,13 @@ struct {
static volatile bool test_sockmap; /* toggled by user-space */ static volatile bool test_sockmap; /* toggled by user-space */
SEC("sk_skb/stream_parser") SEC("sk_skb/stream_parser")
int prog_skb_parser(struct __sk_buff *skb) int prog_stream_parser(struct __sk_buff *skb)
{ {
return skb->len; return skb->len;
} }
SEC("sk_skb/stream_verdict") SEC("sk_skb/stream_verdict")
int prog_skb_verdict(struct __sk_buff *skb) int prog_stream_verdict(struct __sk_buff *skb)
{ {
unsigned int *count; unsigned int *count;
__u32 zero = 0; __u32 zero = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment