Commit 0c615f1c authored by Jakub Kicinski's avatar Jakub Kicinski

Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2023-05-24

We've added 19 non-merge commits during the last 10 day(s) which contain
a total of 20 files changed, 738 insertions(+), 448 deletions(-).

The main changes are:

1) Batch of BPF sockmap fixes found when running against NGINX TCP tests,
   from John Fastabend.

2) Fix a memleak in the LRU{,_PERCPU} hash map when bucket locking fails,
   from Anton Protopopov.

3) Init the BPF offload table earlier than just late_initcall,
   from Jakub Kicinski.

4) Fix ctx access mask generation for 32-bit narrow loads of 64-bit fields,
   from Will Deacon.

5) Remove a now unsupported __fallthrough in BPF samples,
   from Andrii Nakryiko.

6) Fix a typo in pkg-config call for building sign-file,
   from Jeremy Sowden.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  bpf, sockmap: Test progs verifier error with latest clang
  bpf, sockmap: Test FIONREAD returns correct bytes in rx buffer with drops
  bpf, sockmap: Test FIONREAD returns correct bytes in rx buffer
  bpf, sockmap: Test shutdown() correctly exits epoll and recv()=0
  bpf, sockmap: Build helper to create connected socket pair
  bpf, sockmap: Pull socket helpers out of listen test for general use
  bpf, sockmap: Incorrectly handling copied_seq
  bpf, sockmap: Wake up polling after data copy
  bpf, sockmap: TCP data stall on recv before accept
  bpf, sockmap: Handle fin correctly
  bpf, sockmap: Improved check for empty queue
  bpf, sockmap: Reschedule is now done through backlog
  bpf, sockmap: Convert schedule_work into delayed_work
  bpf, sockmap: Pass skb ownership through read_skb
  bpf: fix a memory leak in the LRU and LRU_PERCPU hash maps
  bpf: Fix mask generation for 32-bit narrow loads of 64-bit fields
  samples/bpf: Drop unnecessary fallthrough
  bpf: netdev: init the offload table earlier
  selftests/bpf: Fix pkg-config call building sign-file
====================

Link: https://lore.kernel.org/r/20230524170839.13905-1-daniel@iogearbox.netSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 878ecb08 f726e035
......@@ -71,7 +71,6 @@ struct sk_psock_link {
};
struct sk_psock_work_state {
struct sk_buff *skb;
u32 len;
u32 off;
};
......@@ -105,7 +104,7 @@ struct sk_psock {
struct proto *sk_proto;
struct mutex work_mutex;
struct sk_psock_work_state work_state;
struct work_struct work;
struct delayed_work work;
struct rcu_work rwork;
};
......
......@@ -1470,6 +1470,8 @@ static inline void tcp_adjust_rcv_ssthresh(struct sock *sk)
}
void tcp_cleanup_rbuf(struct sock *sk, int copied);
void __tcp_cleanup_rbuf(struct sock *sk, int copied);
/* We provision sk_rcvbuf around 200% of sk_rcvlowat.
* If 87.5 % (7/8) of the space has been consumed, we want to override
......@@ -2326,6 +2328,14 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#endif /* CONFIG_BPF_SYSCALL */
#ifdef CONFIG_INET
void tcp_eat_skb(struct sock *sk, struct sk_buff *skb);
#else
static inline void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
{
}
#endif
int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
struct sk_msg *msg, u32 bytes, int flags);
#endif /* CONFIG_NET_SOCK_MSG */
......
......@@ -1215,7 +1215,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
ret = htab_lock_bucket(htab, b, hash, &flags);
if (ret)
return ret;
goto err_lock_bucket;
l_old = lookup_elem_raw(head, hash, key, key_size);
......@@ -1236,6 +1236,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
err:
htab_unlock_bucket(htab, b, hash, flags);
err_lock_bucket:
if (ret)
htab_lru_push_free(htab, l_new);
else if (l_old)
......@@ -1338,7 +1339,7 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
ret = htab_lock_bucket(htab, b, hash, &flags);
if (ret)
return ret;
goto err_lock_bucket;
l_old = lookup_elem_raw(head, hash, key, key_size);
......@@ -1361,6 +1362,7 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
ret = 0;
err:
htab_unlock_bucket(htab, b, hash, flags);
err_lock_bucket:
if (l_new)
bpf_lru_push_free(&htab->lru, &l_new->lru_node);
return ret;
......
......@@ -859,4 +859,4 @@ static int __init bpf_offload_init(void)
return rhashtable_init(&offdevs, &offdevs_params);
}
late_initcall(bpf_offload_init);
core_initcall(bpf_offload_init);
......@@ -17033,7 +17033,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
insn->dst_reg,
shift);
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
(1ULL << size * 8) - 1);
}
}
......
......@@ -481,8 +481,6 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
msg_rx = sk_psock_peek_msg(psock);
}
out:
if (psock->work_state.skb && copied > 0)
schedule_work(&psock->work);
return copied;
}
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
......@@ -624,42 +622,33 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
static void sk_psock_skb_state(struct sk_psock *psock,
struct sk_psock_work_state *state,
struct sk_buff *skb,
int len, int off)
{
spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
state->skb = skb;
state->len = len;
state->off = off;
} else {
sock_drop(psock->sk, skb);
}
spin_unlock_bh(&psock->ingress_lock);
}
static void sk_psock_backlog(struct work_struct *work)
{
struct sk_psock *psock = container_of(work, struct sk_psock, work);
struct delayed_work *dwork = to_delayed_work(work);
struct sk_psock *psock = container_of(dwork, struct sk_psock, work);
struct sk_psock_work_state *state = &psock->work_state;
struct sk_buff *skb = NULL;
u32 len = 0, off = 0;
bool ingress;
u32 len, off;
int ret;
mutex_lock(&psock->work_mutex);
if (unlikely(state->skb)) {
spin_lock_bh(&psock->ingress_lock);
skb = state->skb;
if (unlikely(state->len)) {
len = state->len;
off = state->off;
state->skb = NULL;
spin_unlock_bh(&psock->ingress_lock);
}
if (skb)
goto start;
while ((skb = skb_dequeue(&psock->ingress_skb))) {
while ((skb = skb_peek(&psock->ingress_skb))) {
len = skb->len;
off = 0;
if (skb_bpf_strparser(skb)) {
......@@ -668,7 +657,6 @@ static void sk_psock_backlog(struct work_struct *work)
off = stm->offset;
len = stm->full_len;
}
start:
ingress = skb_bpf_ingress(skb);
skb_bpf_redirect_clear(skb);
do {
......@@ -678,22 +666,28 @@ static void sk_psock_backlog(struct work_struct *work)
len, ingress);
if (ret <= 0) {
if (ret == -EAGAIN) {
sk_psock_skb_state(psock, state, skb,
len, off);
sk_psock_skb_state(psock, state, len, off);
/* Delay slightly to prioritize any
* other work that might be here.
*/
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
schedule_delayed_work(&psock->work, 1);
goto end;
}
/* Hard errors break pipe and stop xmit. */
sk_psock_report_error(psock, ret ? -ret : EPIPE);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
sock_drop(psock->sk, skb);
goto end;
}
off += ret;
len -= ret;
} while (len);
if (!ingress)
skb = skb_dequeue(&psock->ingress_skb);
if (!ingress) {
kfree_skb(skb);
}
}
end:
mutex_unlock(&psock->work_mutex);
......@@ -734,7 +728,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
INIT_LIST_HEAD(&psock->link);
spin_lock_init(&psock->link_lock);
INIT_WORK(&psock->work, sk_psock_backlog);
INIT_DELAYED_WORK(&psock->work, sk_psock_backlog);
mutex_init(&psock->work_mutex);
INIT_LIST_HEAD(&psock->ingress_msg);
spin_lock_init(&psock->ingress_lock);
......@@ -786,11 +780,6 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
skb_bpf_redirect_clear(skb);
sock_drop(psock->sk, skb);
}
kfree_skb(psock->work_state.skb);
/* We null the skb here to ensure that calls to sk_psock_backlog
* do not pick up the free'd skb.
*/
psock->work_state.skb = NULL;
__sk_psock_purge_ingress_msg(psock);
}
......@@ -809,7 +798,6 @@ void sk_psock_stop(struct sk_psock *psock)
spin_lock_bh(&psock->ingress_lock);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
sk_psock_cork_free(psock);
__sk_psock_zap_ingress(psock);
spin_unlock_bh(&psock->ingress_lock);
}
......@@ -823,7 +811,8 @@ static void sk_psock_destroy(struct work_struct *work)
sk_psock_done_strp(psock);
cancel_work_sync(&psock->work);
cancel_delayed_work_sync(&psock->work);
__sk_psock_zap_ingress(psock);
mutex_destroy(&psock->work_mutex);
psock_progs_drop(&psock->progs);
......@@ -938,7 +927,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
}
skb_queue_tail(&psock_other->ingress_skb, skb);
schedule_work(&psock_other->work);
schedule_delayed_work(&psock_other->work, 0);
spin_unlock_bh(&psock_other->ingress_lock);
return 0;
}
......@@ -990,10 +979,8 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
err = -EIO;
sk_other = psock->sk;
if (sock_flag(sk_other, SOCK_DEAD) ||
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
skb_bpf_redirect_clear(skb);
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
goto out_free;
}
skb_bpf_set_ingress(skb);
......@@ -1018,22 +1005,23 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
skb_queue_tail(&psock->ingress_skb, skb);
schedule_work(&psock->work);
schedule_delayed_work(&psock->work, 0);
err = 0;
}
spin_unlock_bh(&psock->ingress_lock);
if (err < 0) {
skb_bpf_redirect_clear(skb);
if (err < 0)
goto out_free;
}
}
break;
case __SK_REDIRECT:
tcp_eat_skb(psock->sk, skb);
err = sk_psock_skb_redirect(psock, skb);
break;
case __SK_DROP:
default:
out_free:
skb_bpf_redirect_clear(skb);
tcp_eat_skb(psock->sk, skb);
sock_drop(psock->sk, skb);
}
......@@ -1049,7 +1037,7 @@ static void sk_psock_write_space(struct sock *sk)
psock = sk_psock(sk);
if (likely(psock)) {
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
schedule_work(&psock->work);
schedule_delayed_work(&psock->work, 0);
write_space = psock->saved_write_space;
}
rcu_read_unlock();
......@@ -1078,8 +1066,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
skb_dst_drop(skb);
skb_bpf_redirect_clear(skb);
ret = bpf_prog_run_pin_on_cpu(prog, skb);
if (ret == SK_PASS)
skb_bpf_set_strparser(skb);
skb_bpf_set_strparser(skb);
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
skb->sk = NULL;
}
......@@ -1183,12 +1170,11 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
int ret = __SK_DROP;
int len = skb->len;
skb_get(skb);
rcu_read_lock();
psock = sk_psock(sk);
if (unlikely(!psock)) {
len = 0;
tcp_eat_skb(sk, skb);
sock_drop(sk, skb);
goto out;
}
......@@ -1212,12 +1198,21 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
static void sk_psock_verdict_data_ready(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
int copied;
trace_sk_data_ready(sk);
if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
return;
sock->ops->read_skb(sk, sk_psock_verdict_recv);
copied = sock->ops->read_skb(sk, sk_psock_verdict_recv);
if (copied >= 0) {
struct sk_psock *psock;
rcu_read_lock();
psock = sk_psock(sk);
psock->saved_data_ready(sk);
rcu_read_unlock();
}
}
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
......
......@@ -1644,9 +1644,10 @@ void sock_map_close(struct sock *sk, long timeout)
rcu_read_unlock();
sk_psock_stop(psock);
release_sock(sk);
cancel_work_sync(&psock->work);
cancel_delayed_work_sync(&psock->work);
sk_psock_put(sk, psock);
}
/* Make sure we do not recurse. This is a bug.
* Leak the socket instead of crashing on a stack overflow.
*/
......
......@@ -1571,7 +1571,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
* calculation of whether or not we must ACK for the sake of
* a window update.
*/
static void __tcp_cleanup_rbuf(struct sock *sk, int copied)
void __tcp_cleanup_rbuf(struct sock *sk, int copied)
{
struct tcp_sock *tp = tcp_sk(sk);
bool time_to_ack = false;
......@@ -1773,7 +1773,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
tcp_flags = TCP_SKB_CB(skb)->tcp_flags;
used = recv_actor(sk, skb);
consume_skb(skb);
if (used < 0) {
if (!copied)
copied = used;
......@@ -1787,14 +1786,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
break;
}
}
WRITE_ONCE(tp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
/* Clean up data we have read: This will do ACK frames. */
if (copied > 0)
__tcp_cleanup_rbuf(sk, copied);
return copied;
}
EXPORT_SYMBOL(tcp_read_skb);
......
......@@ -11,6 +11,24 @@
#include <net/inet_common.h>
#include <net/tls.h>
void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tcp;
int copied;
if (!skb || !skb->len || !sk_is_tcp(sk))
return;
if (skb_bpf_strparser(skb))
return;
tcp = tcp_sk(sk);
copied = tcp->copied_seq + skb->len;
WRITE_ONCE(tcp->copied_seq, copied);
tcp_rcv_space_adjust(sk);
__tcp_cleanup_rbuf(sk, skb->len);
}
static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg, u32 apply_bytes, int flags)
{
......@@ -174,14 +192,34 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
return ret;
}
static bool is_next_msg_fin(struct sk_psock *psock)
{
struct scatterlist *sge;
struct sk_msg *msg_rx;
int i;
msg_rx = sk_psock_peek_msg(psock);
i = msg_rx->sg.start;
sge = sk_msg_elem(msg_rx, i);
if (!sge->length) {
struct sk_buff *skb = msg_rx->skb;
if (skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
return true;
}
return false;
}
static int tcp_bpf_recvmsg_parser(struct sock *sk,
struct msghdr *msg,
size_t len,
int flags,
int *addr_len)
{
struct tcp_sock *tcp = tcp_sk(sk);
u32 seq = tcp->copied_seq;
struct sk_psock *psock;
int copied;
int copied = 0;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
......@@ -194,8 +232,43 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
return tcp_recvmsg(sk, msg, len, flags, addr_len);
lock_sock(sk);
/* We may have received data on the sk_receive_queue pre-accept and
* then we can not use read_skb in this context because we haven't
* assigned a sk_socket yet so have no link to the ops. The work-around
* is to check the sk_receive_queue and in these cases read skbs off
* queue again. The read_skb hook is not running at this point because
* of lock_sock so we avoid having multiple runners in read_skb.
*/
if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) {
tcp_data_ready(sk);
/* This handles the ENOMEM errors if we both receive data
* pre accept and are already under memory pressure. At least
* let user know to retry.
*/
if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) {
copied = -EAGAIN;
goto out;
}
}
msg_bytes_ready:
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
/* The typical case for EFAULT is the socket was gracefully
* shutdown with a FIN pkt. So check here the other case is
* some error on copy_page_to_iter which would be unexpected.
* On fin return correct return code to zero.
*/
if (copied == -EFAULT) {
bool is_fin = is_next_msg_fin(psock);
if (is_fin) {
copied = 0;
seq++;
goto out;
}
}
seq += copied;
if (!copied) {
long timeo;
int data;
......@@ -233,6 +306,10 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
copied = -EAGAIN;
}
out:
WRITE_ONCE(tcp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
if (copied > 0)
__tcp_cleanup_rbuf(sk, copied);
release_sock(sk);
sk_psock_put(sk, psock);
return copied;
......
......@@ -1818,7 +1818,7 @@ EXPORT_SYMBOL(__skb_recv_udp);
int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
struct sk_buff *skb;
int err, copied;
int err;
try_again:
skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
......@@ -1837,10 +1837,7 @@ int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
}
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
copied = recv_actor(sk, skb);
kfree_skb(skb);
return copied;
return recv_actor(sk, skb);
}
EXPORT_SYMBOL(udp_read_skb);
......
......@@ -2553,7 +2553,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb;
int err, copied;
int err;
mutex_lock(&u->iolock);
skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
......@@ -2561,10 +2561,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
if (!skb)
return err;
copied = recv_actor(sk, skb);
kfree_skb(skb);
return copied;
return recv_actor(sk, skb);
}
/*
......
......@@ -1441,7 +1441,6 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
struct sock *sk = sk_vsock(vsk);
struct sk_buff *skb;
int off = 0;
int copied;
int err;
spin_lock_bh(&vvs->rx_lock);
......@@ -1454,9 +1453,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
if (!skb)
return err;
copied = recv_actor(sk, skb);
kfree_skb(skb);
return copied;
return recv_actor(sk, skb);
}
EXPORT_SYMBOL_GPL(virtio_transport_read_skb);
......
......@@ -498,7 +498,6 @@ int main(int argc, char **argv)
"Option -%c requires an argument.\n\n",
optopt);
case 'h':
__fallthrough;
default:
Usage();
return 0;
......
......@@ -197,7 +197,7 @@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_r
$(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
$(call msg,SIGN-FILE,,$@)
$(Q)$(CC) $(shell $(HOSTPKG_CONFIG)--cflags libcrypto 2> /dev/null) \
$(Q)$(CC) $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null) \
$< -o $@ \
$(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto)
......
......@@ -2,6 +2,7 @@
// Copyright (c) 2020 Cloudflare
#include <error.h>
#include <netinet/tcp.h>
#include <sys/epoll.h>
#include "test_progs.h"
#include "test_skmsg_load_helpers.skel.h"
......@@ -9,8 +10,12 @@
#include "test_sockmap_invalid_update.skel.h"
#include "test_sockmap_skb_verdict_attach.skel.h"
#include "test_sockmap_progs_query.skel.h"
#include "test_sockmap_pass_prog.skel.h"
#include "test_sockmap_drop_prog.skel.h"
#include "bpf_iter_sockmap.skel.h"
#include "sockmap_helpers.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
#define TCP_REPAIR_ON 1
......@@ -350,6 +355,126 @@ static void test_sockmap_progs_query(enum bpf_attach_type attach_type)
test_sockmap_progs_query__destroy(skel);
}
#define MAX_EVENTS 10
static void test_sockmap_skb_verdict_shutdown(void)
{
struct epoll_event ev, events[MAX_EVENTS];
int n, err, map, verdict, s, c1, p1;
struct test_sockmap_pass_prog *skel;
int epollfd;
int zero = 0;
char b;
skel = test_sockmap_pass_prog__open_and_load();
if (!ASSERT_OK_PTR(skel, "open_and_load"))
return;
verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
map = bpf_map__fd(skel->maps.sock_map_rx);
err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out;
s = socket_loopback(AF_INET, SOCK_STREAM);
if (s < 0)
goto out;
err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
if (err < 0)
goto out;
err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
if (err < 0)
goto out_close;
shutdown(p1, SHUT_WR);
ev.events = EPOLLIN;
ev.data.fd = c1;
epollfd = epoll_create1(0);
if (!ASSERT_GT(epollfd, -1, "epoll_create(0)"))
goto out_close;
err = epoll_ctl(epollfd, EPOLL_CTL_ADD, c1, &ev);
if (!ASSERT_OK(err, "epoll_ctl(EPOLL_CTL_ADD)"))
goto out_close;
err = epoll_wait(epollfd, events, MAX_EVENTS, -1);
if (!ASSERT_EQ(err, 1, "epoll_wait(fd)"))
goto out_close;
n = recv(c1, &b, 1, SOCK_NONBLOCK);
ASSERT_EQ(n, 0, "recv_timeout(fin)");
out_close:
close(c1);
close(p1);
out:
test_sockmap_pass_prog__destroy(skel);
}
static void test_sockmap_skb_verdict_fionread(bool pass_prog)
{
int expected, zero = 0, sent, recvd, avail;
int err, map, verdict, s, c0, c1, p0, p1;
struct test_sockmap_pass_prog *pass;
struct test_sockmap_drop_prog *drop;
char buf[256] = "0123456789";
if (pass_prog) {
pass = test_sockmap_pass_prog__open_and_load();
if (!ASSERT_OK_PTR(pass, "open_and_load"))
return;
verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
map = bpf_map__fd(pass->maps.sock_map_rx);
expected = sizeof(buf);
} else {
drop = test_sockmap_drop_prog__open_and_load();
if (!ASSERT_OK_PTR(drop, "open_and_load"))
return;
verdict = bpf_program__fd(drop->progs.prog_skb_verdict);
map = bpf_map__fd(drop->maps.sock_map_rx);
/* On drop data is consumed immediately and copied_seq inc'd */
expected = 0;
}
err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out;
s = socket_loopback(AF_INET, SOCK_STREAM);
if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
goto out;
err = create_socket_pairs(s, AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
if (!ASSERT_OK(err, "create_socket_pairs(s)"))
goto out;
err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
goto out_close;
sent = xsend(p1, &buf, sizeof(buf), 0);
ASSERT_EQ(sent, sizeof(buf), "xsend(p0)");
err = ioctl(c1, FIONREAD, &avail);
ASSERT_OK(err, "ioctl(FIONREAD) error");
ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
/* On DROP test there will be no data to read */
if (pass_prog) {
recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC);
ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)");
}
out_close:
close(c0);
close(p0);
close(c1);
close(p1);
out:
if (pass_prog)
test_sockmap_pass_prog__destroy(pass);
else
test_sockmap_drop_prog__destroy(drop);
}
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
......@@ -384,4 +509,10 @@ void test_sockmap_basic(void)
test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
if (test__start_subtest("sockmap skb_verdict progs query"))
test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
if (test__start_subtest("sockmap skb_verdict shutdown"))
test_sockmap_skb_verdict_shutdown();
if (test__start_subtest("sockmap skb_verdict fionread"))
test_sockmap_skb_verdict_fionread(true);
if (test__start_subtest("sockmap skb_verdict fionread on drop"))
test_sockmap_skb_verdict_fionread(false);
}
This diff is collapsed.
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_rx SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_tx SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_msg SEC(".maps");
SEC("sk_skb")
int prog_skb_verdict(struct __sk_buff *skb)
{
return SK_DROP;
}
char _license[] SEC("license") = "GPL";
......@@ -191,7 +191,7 @@ SEC("sockops")
int bpf_sockmap(struct bpf_sock_ops *skops)
{
__u32 lport, rport;
int op, err, ret;
int op, ret;
op = (int) skops->op;
......@@ -203,10 +203,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
if (lport == 10000) {
ret = 1;
#ifdef SOCKMAP
err = bpf_sock_map_update(skops, &sock_map, &ret,
bpf_sock_map_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#else
err = bpf_sock_hash_update(skops, &sock_map, &ret,
bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#endif
}
......@@ -218,10 +218,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
if (bpf_ntohl(rport) == 10001) {
ret = 10;
#ifdef SOCKMAP
err = bpf_sock_map_update(skops, &sock_map, &ret,
bpf_sock_map_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#else
err = bpf_sock_hash_update(skops, &sock_map, &ret,
bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#endif
}
......@@ -230,8 +230,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
break;
}
__sink(err);
return 0;
}
......
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_rx SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_tx SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_msg SEC(".maps");
SEC("sk_skb")
int prog_skb_verdict(struct __sk_buff *skb)
{
return SK_PASS;
}
char _license[] SEC("license") = "GPL";
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment