Commit 02bc2b64 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'setsockopt-extra-mem'

Stanislav Fomichev says:

====================
Current setsockopt hook is limited to the size of the buffer that
user had supplied. Since we always allocate memory and copy the value
into kernel space, allocate just a little bit more in case BPF
program needs to override input data with a larger value.

The canonical example is TCP_CONGESTION socket option where
input buffer is a string and if user calls it with a short string,
BPF program has no way of extending it.

The tests are extended with TCP_CONGESTION use case.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents a98bf573 fd5ef31f
...@@ -964,7 +964,6 @@ static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen) ...@@ -964,7 +964,6 @@ static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
return -ENOMEM; return -ENOMEM;
ctx->optval_end = ctx->optval + max_optlen; ctx->optval_end = ctx->optval + max_optlen;
ctx->optlen = max_optlen;
return 0; return 0;
} }
...@@ -984,7 +983,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, ...@@ -984,7 +983,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
.level = *level, .level = *level,
.optname = *optname, .optname = *optname,
}; };
int ret; int ret, max_optlen;
/* Opportunistic check to see whether we have any BPF program /* Opportunistic check to see whether we have any BPF program
* attached to the hook so we don't waste time allocating * attached to the hook so we don't waste time allocating
...@@ -994,10 +993,18 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, ...@@ -994,10 +993,18 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT)) __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT))
return 0; return 0;
ret = sockopt_alloc_buf(&ctx, *optlen); /* Allocate a bit more than the initial user buffer for
* BPF program. The canonical use case is overriding
* TCP_CONGESTION(nv) to TCP_CONGESTION(cubic).
*/
max_optlen = max_t(int, 16, *optlen);
ret = sockopt_alloc_buf(&ctx, max_optlen);
if (ret) if (ret)
return ret; return ret;
ctx.optlen = *optlen;
if (copy_from_user(ctx.optval, optval, *optlen) != 0) { if (copy_from_user(ctx.optval, optval, *optlen) != 0) {
ret = -EFAULT; ret = -EFAULT;
goto out; goto out;
...@@ -1016,7 +1023,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, ...@@ -1016,7 +1023,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
if (ctx.optlen == -1) { if (ctx.optlen == -1) {
/* optlen set to -1, bypass kernel */ /* optlen set to -1, bypass kernel */
ret = 1; ret = 1;
} else if (ctx.optlen > *optlen || ctx.optlen < -1) { } else if (ctx.optlen > max_optlen || ctx.optlen < -1) {
/* optlen is out of bounds */ /* optlen is out of bounds */
ret = -EFAULT; ret = -EFAULT;
} else { } else {
...@@ -1063,6 +1070,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, ...@@ -1063,6 +1070,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
if (ret) if (ret)
return ret; return ret;
ctx.optlen = max_optlen;
if (!retval) { if (!retval) {
/* If kernel getsockopt finished successfully, /* If kernel getsockopt finished successfully,
* copy whatever was returned to the user back * copy whatever was returned to the user back
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <string.h>
#include <netinet/in.h> #include <netinet/in.h>
#include <netinet/tcp.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include "bpf_helpers.h" #include "bpf_helpers.h"
...@@ -42,6 +44,14 @@ int _getsockopt(struct bpf_sockopt *ctx) ...@@ -42,6 +44,14 @@ int _getsockopt(struct bpf_sockopt *ctx)
return 1; return 1;
} }
if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
/* Not interested in SOL_TCP:TCP_CONGESTION;
* let next BPF program in the cgroup chain or kernel
* handle it.
*/
return 1;
}
if (ctx->level != SOL_CUSTOM) if (ctx->level != SOL_CUSTOM)
return 0; /* EPERM, deny everything except custom level */ return 0; /* EPERM, deny everything except custom level */
...@@ -91,6 +101,18 @@ int _setsockopt(struct bpf_sockopt *ctx) ...@@ -91,6 +101,18 @@ int _setsockopt(struct bpf_sockopt *ctx)
return 1; return 1;
} }
if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
/* Always use cubic */
if (optval + 5 > optval_end)
return 0; /* EPERM, bounds check */
memcpy(optval, "cubic", 5);
ctx->optlen = 5;
return 1;
}
if (ctx->level != SOL_CUSTOM) if (ctx->level != SOL_CUSTOM)
return 0; /* EPERM, deny everything except custom level */ return 0; /* EPERM, deny everything except custom level */
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <sys/types.h> #include <sys/types.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <netinet/in.h> #include <netinet/in.h>
#include <netinet/tcp.h>
#include <linux/filter.h> #include <linux/filter.h>
#include <bpf/bpf.h> #include <bpf/bpf.h>
...@@ -25,6 +26,7 @@ static int getsetsockopt(void) ...@@ -25,6 +26,7 @@ static int getsetsockopt(void)
union { union {
char u8[4]; char u8[4];
__u32 u32; __u32 u32;
char cc[16]; /* TCP_CA_NAME_MAX */
} buf = {}; } buf = {};
socklen_t optlen; socklen_t optlen;
...@@ -115,6 +117,29 @@ static int getsetsockopt(void) ...@@ -115,6 +117,29 @@ static int getsetsockopt(void)
goto err; goto err;
} }
/* TCP_CONGESTION can extend the string */
strcpy(buf.cc, "nv");
err = setsockopt(fd, SOL_TCP, TCP_CONGESTION, &buf, strlen("nv"));
if (err) {
log_err("Failed to call setsockopt(TCP_CONGESTION)");
goto err;
}
optlen = sizeof(buf.cc);
err = getsockopt(fd, SOL_TCP, TCP_CONGESTION, &buf, &optlen);
if (err) {
log_err("Failed to call getsockopt(TCP_CONGESTION)");
goto err;
}
if (strcmp(buf.cc, "cubic") != 0) {
log_err("Unexpected getsockopt(TCP_CONGESTION) %s != %s",
buf.cc, "cubic");
goto err;
}
close(fd); close(fd);
return 0; return 0;
err: err:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment