Commit 2fae6771 authored by Martin KaFai Lau's avatar Martin KaFai Lau

Merge branch 'cgroup/connect{4,6} programs for unprivileged ICMP ping'

YiFei Zhu says:

====================

Usually when a TCP/UDP connection is initiated, we can bind the socket
to a specific IP attached to an interface in a cgroup/connect hook.
But for pings, this is impossible, as the hook is not being called.

This series adds the invocation for cgroup/connect{4,6} programs to
unprivileged ICMP ping (i.e. ping sockets created with SOCK_DGRAM
IPPROTO_ICMP(V6) as opposed to SOCK_RAW). This also adds a test to
verify that the hooks are being called and invoking bpf_bind() from
within the hook actually binds the socket.

Patch 1 adds the invocation of the hook.
Patch 2 deduplicates write_sysctl in BPF test_progs.
Patch 3 adds the tests for this hook.

v1 -> v2:
* Added static to bindaddr_v6 in prog_tests/connect_ping.c
* Deduplicated much of the test logic in prog_tests/connect_ping.c
* Deduplicated write_sysctl() to test_progs.c

v2 -> v3:
* Renamed variable "obj" to "skel" for the BPF skeleton object in
  prog_tests/connect_ping.c

v3 -> v4:
* Fixed error path to destroy skel in prog_tests/connect_ping.c
====================
Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parents 665f5d35 58c449a9
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/bpf-cgroup.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/ping.h> #include <net/ping.h>
#include <net/udp.h> #include <net/udp.h>
...@@ -295,6 +296,19 @@ void ping_close(struct sock *sk, long timeout) ...@@ -295,6 +296,19 @@ void ping_close(struct sock *sk, long timeout)
} }
EXPORT_SYMBOL_GPL(ping_close); EXPORT_SYMBOL_GPL(ping_close);
static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
/* This check is replicated from __ip4_datagram_connect() and
* intended to prevent BPF program called below from accessing bytes
* that are out of the bound specified by user in addr_len.
*/
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
}
/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ /* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */
static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
struct sockaddr *uaddr, int addr_len) struct sockaddr *uaddr, int addr_len)
...@@ -1009,6 +1023,7 @@ struct proto ping_prot = { ...@@ -1009,6 +1023,7 @@ struct proto ping_prot = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.init = ping_init_sock, .init = ping_init_sock,
.close = ping_close, .close = ping_close,
.pre_connect = ping_pre_connect,
.connect = ip4_datagram_connect, .connect = ip4_datagram_connect,
.disconnect = __udp_disconnect, .disconnect = __udp_disconnect,
.setsockopt = ip_setsockopt, .setsockopt = ip_setsockopt,
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <net/udp.h> #include <net/udp.h>
#include <net/transp_v6.h> #include <net/transp_v6.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <linux/bpf-cgroup.h>
#include <net/ping.h> #include <net/ping.h>
static void ping_v6_destroy(struct sock *sk) static void ping_v6_destroy(struct sock *sk)
...@@ -49,6 +50,20 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, ...@@ -49,6 +50,20 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
return 0; return 0;
} }
static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
/* This check is replicated from __ip6_datagram_connect() and
* intended to prevent BPF program called below from accessing
* bytes that are out of the bound specified by user in addr_len.
*/
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
}
static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{ {
struct inet_sock *inet = inet_sk(sk); struct inet_sock *inet = inet_sk(sk);
...@@ -191,6 +206,7 @@ struct proto pingv6_prot = { ...@@ -191,6 +206,7 @@ struct proto pingv6_prot = {
.init = ping_init_sock, .init = ping_init_sock,
.close = ping_close, .close = ping_close,
.destroy = ping_v6_destroy, .destroy = ping_v6_destroy,
.pre_connect = ping_v6_pre_connect,
.connect = ip6_datagram_connect_v6_only, .connect = ip6_datagram_connect_v6_only,
.disconnect = __udp_disconnect, .disconnect = __udp_disconnect,
.setsockopt = ipv6_setsockopt, .setsockopt = ipv6_setsockopt,
......
...@@ -22,26 +22,6 @@ static __u32 duration; ...@@ -22,26 +22,6 @@ static __u32 duration;
#define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress" #define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress"
static int write_sysctl(const char *sysctl, const char *value)
{
int fd, err, len;
fd = open(sysctl, O_WRONLY);
if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
sysctl, strerror(errno), errno))
return -1;
len = strlen(value);
err = write(fd, value, len);
close(fd);
if (CHECK(err != len, "write sysctl",
"write(%s, %s, %d): err:%d %s (%d)\n",
sysctl, value, len, err, strerror(errno), errno))
return -1;
return 0;
}
static int prepare_netns(void) static int prepare_netns(void)
{ {
if (CHECK(unshare(CLONE_NEWNET), "create netns", if (CHECK(unshare(CLONE_NEWNET), "create netns",
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2022 Google LLC.
*/
#define _GNU_SOURCE
#include <sys/mount.h>
#include "test_progs.h"
#include "cgroup_helpers.h"
#include "network_helpers.h"
#include "connect_ping.skel.h"
/* 2001:db8::1 */
#define BINDADDR_V6 { { { 0x20,0x01,0x0d,0xb8,0,0,0,0,0,0,0,0,0,0,0,1 } } }
static const struct in6_addr bindaddr_v6 = BINDADDR_V6;
static void subtest(int cgroup_fd, struct connect_ping *skel,
int family, int do_bind)
{
struct sockaddr_in sa4 = {
.sin_family = AF_INET,
.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
};
struct sockaddr_in6 sa6 = {
.sin6_family = AF_INET6,
.sin6_addr = IN6ADDR_LOOPBACK_INIT,
};
struct sockaddr *sa;
socklen_t sa_len;
int protocol;
int sock_fd;
switch (family) {
case AF_INET:
sa = (struct sockaddr *)&sa4;
sa_len = sizeof(sa4);
protocol = IPPROTO_ICMP;
break;
case AF_INET6:
sa = (struct sockaddr *)&sa6;
sa_len = sizeof(sa6);
protocol = IPPROTO_ICMPV6;
break;
}
memset(skel->bss, 0, sizeof(*skel->bss));
skel->bss->do_bind = do_bind;
sock_fd = socket(family, SOCK_DGRAM, protocol);
if (!ASSERT_GE(sock_fd, 0, "sock-create"))
return;
if (!ASSERT_OK(connect(sock_fd, sa, sa_len), "connect"))
goto close_sock;
if (!ASSERT_EQ(skel->bss->invocations_v4, family == AF_INET ? 1 : 0,
"invocations_v4"))
goto close_sock;
if (!ASSERT_EQ(skel->bss->invocations_v6, family == AF_INET6 ? 1 : 0,
"invocations_v6"))
goto close_sock;
if (!ASSERT_EQ(skel->bss->has_error, 0, "has_error"))
goto close_sock;
if (!ASSERT_OK(getsockname(sock_fd, sa, &sa_len),
"getsockname"))
goto close_sock;
switch (family) {
case AF_INET:
if (!ASSERT_EQ(sa4.sin_family, family, "sin_family"))
goto close_sock;
if (!ASSERT_EQ(sa4.sin_addr.s_addr,
htonl(do_bind ? 0x01010101 : INADDR_LOOPBACK),
"sin_addr"))
goto close_sock;
break;
case AF_INET6:
if (!ASSERT_EQ(sa6.sin6_family, AF_INET6, "sin6_family"))
goto close_sock;
if (!ASSERT_EQ(memcmp(&sa6.sin6_addr,
do_bind ? &bindaddr_v6 : &in6addr_loopback,
sizeof(sa6.sin6_addr)),
0, "sin6_addr"))
goto close_sock;
break;
}
close_sock:
close(sock_fd);
}
void test_connect_ping(void)
{
struct connect_ping *skel;
int cgroup_fd;
if (!ASSERT_OK(unshare(CLONE_NEWNET | CLONE_NEWNS), "unshare"))
return;
/* overmount sysfs, and making original sysfs private so overmount
* does not propagate to other mntns.
*/
if (!ASSERT_OK(mount("none", "/sys", NULL, MS_PRIVATE, NULL),
"remount-private-sys"))
return;
if (!ASSERT_OK(mount("sysfs", "/sys", "sysfs", 0, NULL),
"mount-sys"))
return;
if (!ASSERT_OK(mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL),
"mount-bpf"))
goto clean_mount;
if (!ASSERT_OK(system("ip link set dev lo up"), "lo-up"))
goto clean_mount;
if (!ASSERT_OK(system("ip addr add 1.1.1.1 dev lo"), "lo-addr-v4"))
goto clean_mount;
if (!ASSERT_OK(system("ip -6 addr add 2001:db8::1 dev lo"), "lo-addr-v6"))
goto clean_mount;
if (write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0"))
goto clean_mount;
cgroup_fd = test__join_cgroup("/connect_ping");
if (!ASSERT_GE(cgroup_fd, 0, "cg-create"))
goto clean_mount;
skel = connect_ping__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel-load"))
goto close_cgroup;
skel->links.connect_v4_prog =
bpf_program__attach_cgroup(skel->progs.connect_v4_prog, cgroup_fd);
if (!ASSERT_OK_PTR(skel->links.connect_v4_prog, "cg-attach-v4"))
goto skel_destroy;
skel->links.connect_v6_prog =
bpf_program__attach_cgroup(skel->progs.connect_v6_prog, cgroup_fd);
if (!ASSERT_OK_PTR(skel->links.connect_v6_prog, "cg-attach-v6"))
goto skel_destroy;
/* Connect a v4 ping socket to localhost, assert that only v4 is called,
* and called exactly once, and that the socket's bound address is
* original loopback address.
*/
if (test__start_subtest("ipv4"))
subtest(cgroup_fd, skel, AF_INET, 0);
/* Connect a v4 ping socket to localhost, assert that only v4 is called,
* and called exactly once, and that the socket's bound address is
* address we explicitly bound.
*/
if (test__start_subtest("ipv4-bind"))
subtest(cgroup_fd, skel, AF_INET, 1);
/* Connect a v6 ping socket to localhost, assert that only v6 is called,
* and called exactly once, and that the socket's bound address is
* original loopback address.
*/
if (test__start_subtest("ipv6"))
subtest(cgroup_fd, skel, AF_INET6, 0);
/* Connect a v6 ping socket to localhost, assert that only v6 is called,
* and called exactly once, and that the socket's bound address is
* address we explicitly bound.
*/
if (test__start_subtest("ipv6-bind"))
subtest(cgroup_fd, skel, AF_INET6, 1);
skel_destroy:
connect_ping__destroy(skel);
close_cgroup:
close(cgroup_fd);
clean_mount:
umount2("/sys", MNT_DETACH);
}
...@@ -54,26 +54,6 @@ static int create_netns(void) ...@@ -54,26 +54,6 @@ static int create_netns(void)
return 0; return 0;
} }
static int write_sysctl(const char *sysctl, const char *value)
{
int fd, err, len;
fd = open(sysctl, O_WRONLY);
if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
sysctl, strerror(errno), errno))
return -1;
len = strlen(value);
err = write(fd, value, len);
close(fd);
if (CHECK(err != len, "write sysctl",
"write(%s, %s): err:%d %s (%d)\n",
sysctl, value, err, strerror(errno), errno))
return -1;
return 0;
}
static void print_hdr_stg(const struct hdr_stg *hdr_stg, const char *prefix) static void print_hdr_stg(const struct hdr_stg *hdr_stg, const char *prefix)
{ {
fprintf(stderr, "%s{active:%u, resend_syn:%u, syncookie:%u, fastopen:%u}\n", fprintf(stderr, "%s{active:%u, resend_syn:%u, syncookie:%u, fastopen:%u}\n",
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2022 Google LLC.
*/
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include <netinet/in.h>
#include <sys/socket.h>
/* 2001:db8::1 */
#define BINDADDR_V6 { { { 0x20,0x01,0x0d,0xb8,0,0,0,0,0,0,0,0,0,0,0,1 } } }
__u32 do_bind = 0;
__u32 has_error = 0;
__u32 invocations_v4 = 0;
__u32 invocations_v6 = 0;
SEC("cgroup/connect4")
int connect_v4_prog(struct bpf_sock_addr *ctx)
{
struct sockaddr_in sa = {
.sin_family = AF_INET,
.sin_addr.s_addr = bpf_htonl(0x01010101),
};
__sync_fetch_and_add(&invocations_v4, 1);
if (do_bind && bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)))
has_error = 1;
return 1;
}
SEC("cgroup/connect6")
int connect_v6_prog(struct bpf_sock_addr *ctx)
{
struct sockaddr_in6 sa = {
.sin6_family = AF_INET6,
.sin6_addr = BINDADDR_V6,
};
__sync_fetch_and_add(&invocations_v6, 1);
if (do_bind && bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)))
has_error = 1;
return 1;
}
char _license[] SEC("license") = "GPL";
...@@ -943,6 +943,23 @@ int trigger_module_test_write(int write_sz) ...@@ -943,6 +943,23 @@ int trigger_module_test_write(int write_sz)
return 0; return 0;
} }
int write_sysctl(const char *sysctl, const char *value)
{
int fd, err, len;
fd = open(sysctl, O_WRONLY);
if (!ASSERT_NEQ(fd, -1, "open sysctl"))
return -1;
len = strlen(value);
err = write(fd, value, len);
close(fd);
if (!ASSERT_EQ(err, len, "write sysctl"))
return -1;
return 0;
}
#define MAX_BACKTRACE_SZ 128 #define MAX_BACKTRACE_SZ 128
void crash_handler(int signum) void crash_handler(int signum)
{ {
......
...@@ -384,6 +384,7 @@ int extract_build_id(char *build_id, size_t size); ...@@ -384,6 +384,7 @@ int extract_build_id(char *build_id, size_t size);
int kern_sync_rcu(void); int kern_sync_rcu(void);
int trigger_module_test_read(int read_sz); int trigger_module_test_read(int read_sz);
int trigger_module_test_write(int write_sz); int trigger_module_test_write(int write_sz);
int write_sysctl(const char *sysctl, const char *value);
#ifdef __x86_64__ #ifdef __x86_64__
#define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep" #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment