Commit b1d9fc41 authored by Daniel Borkmann's avatar Daniel Borkmann Committed by David S. Miller

bpf: add napi_id read access to __sk_buff

Add napi_id access to __sk_buff for socket filter program types, tc
program types and other bpf_convert_ctx_access() users. Having access
to skb->napi_id is useful for per RX queue listener siloing, f.e.
in combination with SO_ATTACH_REUSEPORT_EBPF and when busy polling is
used, meaning SO_REUSEPORT enabled listeners can then select the
corresponding socket at SYN time already [1]. The skb is marked via
skb_mark_napi_id() early in the receive path (e.g., napi_gro_receive()).

Currently, sockets can only use SO_INCOMING_NAPI_ID from 6d433902
("net: Introduce SO_INCOMING_NAPI_ID") as a socket option to look up
the NAPI ID associated with the queue for steering, which requires a
prior sk_mark_napi_id() after the socket was looked up.

Semantics for the __sk_buff napi_id access are similar, meaning if
skb->napi_id is < MIN_NAPI_ID (e.g. outgoing packets using sender_cpu),
then an invalid napi_id of 0 is returned to the program, otherwise a
valid non-zero napi_id.

  [1] http://netdevconf.org/2.1/slides/apr6/dumazet-BUSY-POLLING-Netdev-2.1.pdfSuggested-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 73e64fa4
...@@ -603,6 +603,7 @@ struct __sk_buff { ...@@ -603,6 +603,7 @@ struct __sk_buff {
__u32 tc_classid; __u32 tc_classid;
__u32 data; __u32 data;
__u32 data_end; __u32 data_end;
__u32 napi_id;
}; };
struct bpf_tunnel_key { struct bpf_tunnel_key {
......
...@@ -53,6 +53,7 @@ ...@@ -53,6 +53,7 @@
#include <net/dst_metadata.h> #include <net/dst_metadata.h>
#include <net/dst.h> #include <net/dst.h>
#include <net/sock_reuseport.h> #include <net/sock_reuseport.h>
#include <net/busy_poll.h>
/** /**
* sk_filter_trim_cap - run a packet through a socket filter * sk_filter_trim_cap - run a packet through a socket filter
...@@ -3201,6 +3202,19 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, ...@@ -3201,6 +3202,19 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg); *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
else else
*insn++ = BPF_MOV64_IMM(si->dst_reg, 0); *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
#endif
break;
case offsetof(struct __sk_buff, napi_id):
#if defined(CONFIG_NET_RX_BUSY_POLL)
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, napi_id) != 4);
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, napi_id));
*insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
#else
*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
#endif #endif
break; break;
} }
......
...@@ -603,6 +603,7 @@ struct __sk_buff { ...@@ -603,6 +603,7 @@ struct __sk_buff {
__u32 tc_classid; __u32 tc_classid;
__u32 data; __u32 data;
__u32 data_end; __u32 data_end;
__u32 napi_id;
}; };
struct bpf_tunnel_key { struct bpf_tunnel_key {
......
...@@ -772,6 +772,9 @@ static struct bpf_test tests[] = { ...@@ -772,6 +772,9 @@ static struct bpf_test tests[] = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, vlan_tci)), offsetof(struct __sk_buff, vlan_tci)),
BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, napi_id)),
BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
BPF_EXIT_INSN(), BPF_EXIT_INSN(),
}, },
.result = ACCEPT, .result = ACCEPT,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment