Commit 562b1fdf authored by Haiyang Zhang's avatar Haiyang Zhang Committed by Jakub Kicinski

tcp: Set pingpong threshold via sysctl

TCP pingpong threshold is 1 by default. But some applications, like SQL DB
may prefer a higher pingpong threshold to activate delayed acks in quick
ack mode for better performance.

The pingpong threshold and related code were changed to 3 in the year
2019 in:
  commit 4a41f453 ("tcp: change pingpong threshold to 3")
And reverted to 1 in the year 2022 in:
  commit 4d8f24ee ("Revert "tcp: change pingpong threshold to 3"")

There is no single value that fits all applications.
Add net.ipv4.tcp_pingpong_thresh sysctl tunable, so it can be tuned for
optimal performance based on the application needs.
Signed-off-by: default avatarHaiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: default avatarSimon Horman <horms@kernel.org>
Reviewed-by: default avatarEric Dumazet <edumazet@google.com>
Acked-by: default avatarNeal Cardwell <ncardwell@google.com>
Reviewed-by: default avatarKuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/1697056244-21888-1-git-send-email-haiyangz@microsoft.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 39d08b91
...@@ -1183,6 +1183,19 @@ tcp_plb_cong_thresh - INTEGER ...@@ -1183,6 +1183,19 @@ tcp_plb_cong_thresh - INTEGER
Default: 128 Default: 128
tcp_pingpong_thresh - INTEGER
The number of estimated data replies sent for estimated incoming data
requests that must happen before TCP considers that a connection is a
"ping-pong" (request-response) connection for which delayed
acknowledgments can provide benefits.
This threshold is 1 by default, but some applications may need a higher
threshold for optimal performance.
Possible Values: 1 - 255
Default: 1
UDP variables UDP variables
============= =============
......
...@@ -328,11 +328,10 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, ...@@ -328,11 +328,10 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
#define TCP_PINGPONG_THRESH 1
static inline void inet_csk_enter_pingpong_mode(struct sock *sk) static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
{ {
inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH; inet_csk(sk)->icsk_ack.pingpong =
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh);
} }
static inline void inet_csk_exit_pingpong_mode(struct sock *sk) static inline void inet_csk_exit_pingpong_mode(struct sock *sk)
...@@ -342,7 +341,16 @@ static inline void inet_csk_exit_pingpong_mode(struct sock *sk) ...@@ -342,7 +341,16 @@ static inline void inet_csk_exit_pingpong_mode(struct sock *sk)
static inline bool inet_csk_in_pingpong_mode(struct sock *sk) static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
{ {
return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; return inet_csk(sk)->icsk_ack.pingpong >=
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh);
}
static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ack.pingpong < U8_MAX)
icsk->icsk_ack.pingpong++;
} }
static inline bool inet_csk_has_ulp(const struct sock *sk) static inline bool inet_csk_has_ulp(const struct sock *sk)
......
...@@ -133,6 +133,8 @@ struct netns_ipv4 { ...@@ -133,6 +133,8 @@ struct netns_ipv4 {
u8 sysctl_tcp_migrate_req; u8 sysctl_tcp_migrate_req;
u8 sysctl_tcp_comp_sack_nr; u8 sysctl_tcp_comp_sack_nr;
u8 sysctl_tcp_backlog_ack_defer; u8 sysctl_tcp_backlog_ack_defer;
u8 sysctl_tcp_pingpong_thresh;
int sysctl_tcp_reordering; int sysctl_tcp_reordering;
u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries1;
u8 sysctl_tcp_retries2; u8 sysctl_tcp_retries2;
......
...@@ -1498,6 +1498,14 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -1498,6 +1498,14 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ZERO, .extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE, .extra2 = SYSCTL_ONE,
}, },
{
.procname = "tcp_pingpong_thresh",
.data = &init_net.ipv4.sysctl_tcp_pingpong_thresh,
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ONE,
},
{ } { }
}; };
......
...@@ -3288,6 +3288,8 @@ static int __net_init tcp_sk_init(struct net *net) ...@@ -3288,6 +3288,8 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_syn_linear_timeouts = 4; net->ipv4.sysctl_tcp_syn_linear_timeouts = 4;
net->ipv4.sysctl_tcp_shrink_window = 0; net->ipv4.sysctl_tcp_shrink_window = 0;
net->ipv4.sysctl_tcp_pingpong_thresh = 1;
return 0; return 0;
} }
......
...@@ -170,10 +170,10 @@ static void tcp_event_data_sent(struct tcp_sock *tp, ...@@ -170,10 +170,10 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
tp->lsndtime = now; tp->lsndtime = now;
/* If it is a reply for ato after last received /* If it is a reply for ato after last received
* packet, enter pingpong mode. * packet, increase pingpong count.
*/ */
if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
inet_csk_enter_pingpong_mode(sk); inet_csk_inc_pingpong_cnt(sk);
} }
/* Account for an ACK we sent. */ /* Account for an ACK we sent. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment