Commit f9bcf968 authored by Dmitry Yakunin's avatar Dmitry Yakunin Committed by Alexei Starovoitov

bpf: Add SO_KEEPALIVE and related options to bpf_setsockopt

This patch adds support of SO_KEEPALIVE flag and TCP related options
to bpf_setsockopt() routine. This is helpful if we want to enable or tune
TCP keepalive for applications which don't do it in the userspace code.

v3:
  - update kernel-doc in uapi (Nikita Vetoshkin <nekto0n@yandex-team.ru>)

v4:
  - update kernel-doc in tools too (Alexei Starovoitov)
  - add test to selftests (Alexei Starovoitov)
Signed-off-by: default avatarDmitry Yakunin <zeil@yandex-team.ru>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarMartin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200620153052.9439-3-zeil@yandex-team.ru
parent aad4a0a9
...@@ -1621,10 +1621,13 @@ union bpf_attr { ...@@ -1621,10 +1621,13 @@ union bpf_attr {
* *
* * **SOL_SOCKET**, which supports the following *optname*\ s: * * **SOL_SOCKET**, which supports the following *optname*\ s:
* **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
* **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
* **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
* * **IPPROTO_TCP**, which supports the following *optname*\ s: * * **IPPROTO_TCP**, which supports the following *optname*\ s:
* **TCP_CONGESTION**, **TCP_BPF_IW**, * **TCP_CONGESTION**, **TCP_BPF_IW**,
* **TCP_BPF_SNDCWND_CLAMP**. * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
* Return * Return
......
...@@ -4289,10 +4289,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, ...@@ -4289,10 +4289,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen, u32 flags) char *optval, int optlen, u32 flags)
{ {
char devname[IFNAMSIZ]; char devname[IFNAMSIZ];
int val, valbool;
struct net *net; struct net *net;
int ifindex; int ifindex;
int ret = 0; int ret = 0;
int val;
if (!sk_fullsock(sk)) if (!sk_fullsock(sk))
return -EINVAL; return -EINVAL;
...@@ -4303,6 +4303,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, ...@@ -4303,6 +4303,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
if (optlen != sizeof(int) && optname != SO_BINDTODEVICE) if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
return -EINVAL; return -EINVAL;
val = *((int *)optval); val = *((int *)optval);
valbool = val ? 1 : 0;
/* Only some socketops are supported */ /* Only some socketops are supported */
switch (optname) { switch (optname) {
...@@ -4361,6 +4362,11 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, ...@@ -4361,6 +4362,11 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
} }
ret = sock_bindtoindex(sk, ifindex, false); ret = sock_bindtoindex(sk, ifindex, false);
break; break;
case SO_KEEPALIVE:
if (sk->sk_prot->keepalive)
sk->sk_prot->keepalive(sk, valbool);
sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
break;
default: default:
ret = -EINVAL; ret = -EINVAL;
} }
...@@ -4421,6 +4427,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, ...@@ -4421,6 +4427,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
ret = tcp_set_congestion_control(sk, name, false, ret = tcp_set_congestion_control(sk, name, false,
reinit, true); reinit, true);
} else { } else {
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
if (optlen != sizeof(int)) if (optlen != sizeof(int))
...@@ -4449,6 +4456,33 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, ...@@ -4449,6 +4456,33 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
else else
tp->save_syn = val; tp->save_syn = val;
break; break;
case TCP_KEEPIDLE:
ret = tcp_sock_set_keepidle_locked(sk, val);
break;
case TCP_KEEPINTVL:
if (val < 1 || val > MAX_TCP_KEEPINTVL)
ret = -EINVAL;
else
tp->keepalive_intvl = val * HZ;
break;
case TCP_KEEPCNT:
if (val < 1 || val > MAX_TCP_KEEPCNT)
ret = -EINVAL;
else
tp->keepalive_probes = val;
break;
case TCP_SYNCNT:
if (val < 1 || val > MAX_TCP_SYNCNT)
ret = -EINVAL;
else
icsk->icsk_syn_retries = val;
break;
case TCP_USER_TIMEOUT:
if (val < 0)
ret = -EINVAL;
else
icsk->icsk_user_timeout = val;
break;
default: default:
ret = -EINVAL; ret = -EINVAL;
} }
......
...@@ -1621,10 +1621,13 @@ union bpf_attr { ...@@ -1621,10 +1621,13 @@ union bpf_attr {
* *
* * **SOL_SOCKET**, which supports the following *optname*\ s: * * **SOL_SOCKET**, which supports the following *optname*\ s:
* **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
* **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
* **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
* * **IPPROTO_TCP**, which supports the following *optname*\ s: * * **IPPROTO_TCP**, which supports the following *optname*\ s:
* **TCP_CONGESTION**, **TCP_BPF_IW**, * **TCP_CONGESTION**, **TCP_BPF_IW**,
* **TCP_BPF_SNDCWND_CLAMP**. * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
* Return * Return
......
...@@ -104,6 +104,30 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx) ...@@ -104,6 +104,30 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx)
return 0; return 0;
} }
static __inline int set_keepalive(struct bpf_sock_addr *ctx)
{
int zero = 0, one = 1;
if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)))
return 1;
if (ctx->type == SOCK_STREAM) {
if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one)))
return 1;
if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one)))
return 1;
if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one)))
return 1;
if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one)))
return 1;
if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one)))
return 1;
}
if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero)))
return 1;
return 0;
}
SEC("cgroup/connect4") SEC("cgroup/connect4")
int connect_v4_prog(struct bpf_sock_addr *ctx) int connect_v4_prog(struct bpf_sock_addr *ctx)
{ {
...@@ -121,6 +145,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx) ...@@ -121,6 +145,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
if (bind_to_device(ctx)) if (bind_to_device(ctx))
return 0; return 0;
if (set_keepalive(ctx))
return 0;
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
return 0; return 0;
else if (ctx->type == SOCK_STREAM) else if (ctx->type == SOCK_STREAM)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment