Commit 26859240 authored by Akhmat Karakotov's avatar Akhmat Karakotov Committed by David S. Miller

txhash: Add socket option to control TX hash rethink behavior

Add the SO_TXREHASH socket option to control hash rethink behavior per socket.
When default mode is set, sockets disable rehash at initialization and use
sysctl option when entering listen state. setsockopt() overrides default
behavior.
Signed-off-by: default avatarAkhmat Karakotov <hmukos@yandex-team.ru>
Reviewed-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e187013a
...@@ -133,6 +133,8 @@ ...@@ -133,6 +133,8 @@
#define SO_RESERVE_MEM 73 #define SO_RESERVE_MEM 73
#define SO_TXREHASH 74
#if !defined(__KERNEL__) #if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 #if __BITS_PER_LONG == 64
......
...@@ -144,6 +144,8 @@ ...@@ -144,6 +144,8 @@
#define SO_RESERVE_MEM 73 #define SO_RESERVE_MEM 73
#define SO_TXREHASH 74
#if !defined(__KERNEL__) #if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 #if __BITS_PER_LONG == 64
......
...@@ -125,6 +125,8 @@ ...@@ -125,6 +125,8 @@
#define SO_RESERVE_MEM 0x4047 #define SO_RESERVE_MEM 0x4047
#define SO_TXREHASH 0x4048
#if !defined(__KERNEL__) #if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 #if __BITS_PER_LONG == 64
......
...@@ -126,6 +126,8 @@ ...@@ -126,6 +126,8 @@
#define SO_RESERVE_MEM 0x0052 #define SO_RESERVE_MEM 0x0052
#define SO_TXREHASH 0x0053
#if !defined(__KERNEL__) #if !defined(__KERNEL__)
......
...@@ -316,6 +316,7 @@ struct sk_filter; ...@@ -316,6 +316,7 @@ struct sk_filter;
* @sk_rcvtimeo: %SO_RCVTIMEO setting * @sk_rcvtimeo: %SO_RCVTIMEO setting
* @sk_sndtimeo: %SO_SNDTIMEO setting * @sk_sndtimeo: %SO_SNDTIMEO setting
* @sk_txhash: computed flow hash for use on transmit * @sk_txhash: computed flow hash for use on transmit
* @sk_txrehash: enable TX hash rethink
* @sk_filter: socket filtering instructions * @sk_filter: socket filtering instructions
* @sk_timer: sock cleanup timer * @sk_timer: sock cleanup timer
* @sk_stamp: time stamp of last packet received * @sk_stamp: time stamp of last packet received
...@@ -491,6 +492,7 @@ struct sock { ...@@ -491,6 +492,7 @@ struct sock {
u32 sk_ack_backlog; u32 sk_ack_backlog;
u32 sk_max_ack_backlog; u32 sk_max_ack_backlog;
kuid_t sk_uid; kuid_t sk_uid;
u8 sk_txrehash;
#ifdef CONFIG_NET_RX_BUSY_POLL #ifdef CONFIG_NET_RX_BUSY_POLL
u8 sk_prefer_busy_poll; u8 sk_prefer_busy_poll;
u16 sk_busy_poll_budget; u16 sk_busy_poll_budget;
...@@ -2066,18 +2068,10 @@ static inline void sk_set_txhash(struct sock *sk) ...@@ -2066,18 +2068,10 @@ static inline void sk_set_txhash(struct sock *sk)
static inline bool sk_rethink_txhash(struct sock *sk) static inline bool sk_rethink_txhash(struct sock *sk)
{ {
u8 rehash; if (sk->sk_txhash && sk->sk_txrehash == SOCK_TXREHASH_ENABLED) {
if (!sk->sk_txhash)
return false;
rehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
if (rehash) {
sk_set_txhash(sk); sk_set_txhash(sk);
return true; return true;
} }
return false; return false;
} }
......
...@@ -128,6 +128,8 @@ ...@@ -128,6 +128,8 @@
#define SO_RESERVE_MEM 73 #define SO_RESERVE_MEM 73
#define SO_TXREHASH 74
#if !defined(__KERNEL__) #if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
......
...@@ -31,6 +31,7 @@ struct __kernel_sockaddr_storage { ...@@ -31,6 +31,7 @@ struct __kernel_sockaddr_storage {
#define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK)
#define SOCK_TXREHASH_DEFAULT ((u8)-1)
#define SOCK_TXREHASH_DISABLED 0 #define SOCK_TXREHASH_DISABLED 0
#define SOCK_TXREHASH_ENABLED 1 #define SOCK_TXREHASH_ENABLED 1
......
...@@ -1447,6 +1447,14 @@ int sock_setsockopt(struct socket *sock, int level, int optname, ...@@ -1447,6 +1447,14 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
break; break;
} }
case SO_TXREHASH:
if (val < -1 || val > 1) {
ret = -EINVAL;
break;
}
sk->sk_txrehash = (u8)val;
break;
default: default:
ret = -ENOPROTOOPT; ret = -ENOPROTOOPT;
break; break;
...@@ -1834,6 +1842,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, ...@@ -1834,6 +1842,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_reserved_mem; v.val = sk->sk_reserved_mem;
break; break;
case SO_TXREHASH:
v.val = sk->sk_txrehash;
break;
default: default:
/* We implement the SO_SNDLOWAT etc to not be settable /* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7). * (1003.1g 7).
...@@ -3279,6 +3291,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) ...@@ -3279,6 +3291,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_pacing_rate = ~0UL; sk->sk_pacing_rate = ~0UL;
WRITE_ONCE(sk->sk_pacing_shift, 10); WRITE_ONCE(sk->sk_pacing_shift, 10);
sk->sk_incoming_cpu = -1; sk->sk_incoming_cpu = -1;
sk->sk_txrehash = SOCK_TXREHASH_DEFAULT;
sk_rx_queue_clear(sk); sk_rx_queue_clear(sk);
/* /*
......
...@@ -1046,6 +1046,9 @@ int inet_csk_listen_start(struct sock *sk) ...@@ -1046,6 +1046,9 @@ int inet_csk_listen_start(struct sock *sk)
sk->sk_ack_backlog = 0; sk->sk_ack_backlog = 0;
inet_csk_delack_init(sk); inet_csk_delack_init(sk);
if (sk->sk_txrehash == SOCK_TXREHASH_DEFAULT)
sk->sk_txrehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
/* There is race window here: we announce ourselves listening, /* There is race window here: we announce ourselves listening,
* but this transition is still not validated by get_port(). * but this transition is still not validated by get_port().
* It is OK, because this socket enters to hash table only * It is OK, because this socket enters to hash table only
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment