Commit 1fc5bdb2 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'Split bpf_sock dst_port field'

Jakub Sitnicki says:

====================

This is a follow-up to discussion around the idea of making dst_port in struct
bpf_sock a 16-bit field that happened in [1].

v2:
- use an anonymous field for zero padding (Alexei)

v1:
- keep dst_field offset unchanged to prevent existing BPF program breakage
  (Martin)
- allow 8-bit loads from dst_port[0] and [1]
- add test coverage for the verifier and the context access converter

[1] https://lore.kernel.org/bpf/87sftbobys.fsf@cloudflare.com/
====================
Acked-by: default avatarMartin KaFai Lau <kafai@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents b3dddab2 8f50f16f
......@@ -5574,7 +5574,8 @@ struct bpf_sock {
__u32 src_ip4;
__u32 src_ip6[4];
__u32 src_port; /* host byte order */
__u32 dst_port; /* network byte order */
__be16 dst_port; /* network byte order */
__u16 :16; /* zero padding */
__u32 dst_ip4;
__u32 dst_ip6[4];
__u32 state;
......
......@@ -8265,6 +8265,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
int field_size;
if (off < 0 || off >= sizeof(struct bpf_sock))
return false;
......@@ -8276,7 +8277,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
case offsetof(struct bpf_sock, family):
case offsetof(struct bpf_sock, type):
case offsetof(struct bpf_sock, protocol):
case offsetof(struct bpf_sock, dst_port):
case offsetof(struct bpf_sock, src_port):
case offsetof(struct bpf_sock, rx_queue_mapping):
case bpf_ctx_range(struct bpf_sock, src_ip4):
......@@ -8285,6 +8285,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
case bpf_ctx_range(struct bpf_sock, dst_port):
field_size = size == size_default ?
size_default : sizeof_field(struct bpf_sock, dst_port);
bpf_ctx_record_field_size(info, field_size);
return bpf_ctx_narrow_access_ok(off, size, field_size);
case offsetofend(struct bpf_sock, dst_port) ...
offsetof(struct bpf_sock, dst_ip4) - 1:
return false;
}
return size == size_default;
......
......@@ -5574,7 +5574,8 @@ struct bpf_sock {
__u32 src_ip4;
__u32 src_ip6[4];
__u32 src_port; /* host byte order */
__u32 dst_port; /* network byte order */
__be16 dst_port; /* network byte order */
__u16 :16; /* zero padding */
__u32 dst_ip4;
__u32 dst_ip6[4];
__u32 state;
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#define _GNU_SOURCE
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <sched.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
......@@ -20,6 +22,7 @@
enum bpf_linum_array_idx {
EGRESS_LINUM_IDX,
INGRESS_LINUM_IDX,
READ_SK_DST_PORT_LINUM_IDX,
__NR_BPF_LINUM_ARRAY_IDX,
};
......@@ -42,8 +45,16 @@ static __u64 child_cg_id;
static int linum_map_fd;
static __u32 duration;
static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
static bool create_netns(void)
{
if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
return false;
if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
return false;
return true;
}
static void print_sk(const struct bpf_sock *sk, const char *prefix)
{
......@@ -91,19 +102,24 @@ static void check_result(void)
{
struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
struct bpf_sock srv_sk, cli_sk, listen_sk;
__u32 ingress_linum, egress_linum;
__u32 idx, ingress_linum, egress_linum, linum;
int err;
err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
&egress_linum);
idx = EGRESS_LINUM_IDX;
err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum);
CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
&ingress_linum);
idx = INGRESS_LINUM_IDX;
err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum);
CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d\n", err, errno);
idx = READ_SK_DST_PORT_LINUM_IDX;
err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum);
ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)");
ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line");
memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
......@@ -262,7 +278,7 @@ static void test(void)
char buf[DATA_LEN];
/* Prepare listen_fd */
listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0);
/* start_server() has logged the error details */
if (CHECK_FAIL(listen_fd == -1))
goto done;
......@@ -330,8 +346,12 @@ static void test(void)
void serial_test_sock_fields(void)
{
struct bpf_link *egress_link = NULL, *ingress_link = NULL;
int parent_cg_fd = -1, child_cg_fd = -1;
struct bpf_link *link;
/* Use a dedicated netns to have a fixed listen port */
if (!create_netns())
return;
/* Create a cgroup, get fd, and join it */
parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
......@@ -352,15 +372,20 @@ void serial_test_sock_fields(void)
if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
goto done;
egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
child_cg_fd);
if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)"))
link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd);
if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)"))
goto done;
skel->links.egress_read_sock_fields = link;
link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd);
if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)"))
goto done;
skel->links.ingress_read_sock_fields = link;
ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
child_cg_fd);
if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)"))
link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd);
if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port"))
goto done;
skel->links.read_sk_dst_port = link;
linum_map_fd = bpf_map__fd(skel->maps.linum_map);
sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
......@@ -369,8 +394,7 @@ void serial_test_sock_fields(void)
test();
done:
bpf_link__destroy(egress_link);
bpf_link__destroy(ingress_link);
test_sock_fields__detach(skel);
test_sock_fields__destroy(skel);
if (child_cg_fd >= 0)
close(child_cg_fd);
......
......@@ -12,6 +12,7 @@
enum bpf_linum_array_idx {
EGRESS_LINUM_IDX,
INGRESS_LINUM_IDX,
READ_SK_DST_PORT_LINUM_IDX,
__NR_BPF_LINUM_ARRAY_IDX,
};
......@@ -250,4 +251,44 @@ int ingress_read_sock_fields(struct __sk_buff *skb)
return CG_OK;
}
static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
{
__u32 *word = (__u32 *)&sk->dst_port;
return word[0] == bpf_htonl(0xcafe0000);
}
static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
{
__u16 *half = (__u16 *)&sk->dst_port;
return half[0] == bpf_htons(0xcafe);
}
static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk)
{
__u8 *byte = (__u8 *)&sk->dst_port;
return byte[0] == 0xca && byte[1] == 0xfe;
}
SEC("cgroup_skb/egress")
int read_sk_dst_port(struct __sk_buff *skb)
{
__u32 linum, linum_idx;
struct bpf_sock *sk;
linum_idx = READ_SK_DST_PORT_LINUM_IDX;
sk = skb->sk;
if (!sk)
RET_LOG();
if (!sk_dst_port__load_word(sk))
RET_LOG();
if (!sk_dst_port__load_half(sk))
RET_LOG();
if (!sk_dst_port__load_byte(sk))
RET_LOG();
return CG_OK;
}
char _license[] SEC("license") = "GPL";
......@@ -121,7 +121,25 @@
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->dst_port [narrow load]",
"sk_fullsock(skb->sk): sk->dst_port [word load] (backward compatibility)",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->dst_port [half load]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
......@@ -139,7 +157,64 @@
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]",
"sk_fullsock(skb->sk): sk->dst_port [half load] (invalid)",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid sock access",
},
{
"sk_fullsock(skb->sk): sk->dst_port [byte load]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->dst_port [byte load] (invalid)",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid sock access",
},
{
"sk_fullsock(skb->sk): past sk->dst_port [half load] (invalid)",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
......@@ -149,7 +224,7 @@
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, dst_port)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment