Commit 54751f4d authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-rto-min-us'

Kevin Yang says:

====================
tcp: add sysctl_tcp_rto_min_us

Adding a sysctl knob to allow user to specify a default
rto_min at socket init time.

After this patch series, the rto_min will has multiple sources:
route option has the highest precedence, followed by the
TCP_BPF_RTO_MIN socket option, followed by this new
tcp_rto_min_us sysctl.

v3:
    fix typo, simplify min/max_t to min/max

v2:
    fit line width to 80 column.

v2: https://lore.kernel.org/netdev/20240530153436.2202800-1-yyd@google.com/
v1: https://lore.kernel.org/netdev/20240528171320.1332292-1-yyd@google.com/
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 69e0b33a f086edef
...@@ -1196,6 +1196,19 @@ tcp_pingpong_thresh - INTEGER ...@@ -1196,6 +1196,19 @@ tcp_pingpong_thresh - INTEGER
Default: 1 Default: 1
tcp_rto_min_us - INTEGER
Minimal TCP retransmission timeout (in microseconds). Note that the
rto_min route option has the highest precedence for configuring this
setting, followed by the TCP_BPF_RTO_MIN socket option, followed by
this tcp_rto_min_us sysctl.
The recommended practice is to use a value less or equal to 200000
microseconds.
Possible Values: 1 - INT_MAX
Default: 200000
UDP variables UDP variables
============= =============
......
...@@ -170,6 +170,7 @@ struct netns_ipv4 { ...@@ -170,6 +170,7 @@ struct netns_ipv4 {
u8 sysctl_tcp_sack; u8 sysctl_tcp_sack;
u8 sysctl_tcp_window_scaling; u8 sysctl_tcp_window_scaling;
u8 sysctl_tcp_timestamps; u8 sysctl_tcp_timestamps;
int sysctl_tcp_rto_min_us;
u8 sysctl_tcp_recovery; u8 sysctl_tcp_recovery;
u8 sysctl_tcp_thin_linear_timeouts; u8 sysctl_tcp_thin_linear_timeouts;
u8 sysctl_tcp_slow_start_after_idle; u8 sysctl_tcp_slow_start_after_idle;
......
...@@ -1503,6 +1503,14 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -1503,6 +1503,14 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dou8vec_minmax, .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ONE, .extra1 = SYSCTL_ONE,
}, },
{
.procname = "tcp_rto_min_us",
.data = &init_net.ipv4.sysctl_tcp_rto_min_us,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ONE,
},
}; };
static __net_init int ipv4_sysctl_init_net(struct net *net) static __net_init int ipv4_sysctl_init_net(struct net *net)
......
...@@ -420,6 +420,7 @@ void tcp_init_sock(struct sock *sk) ...@@ -420,6 +420,7 @@ void tcp_init_sock(struct sock *sk)
{ {
struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
int rto_min_us;
tp->out_of_order_queue = RB_ROOT; tp->out_of_order_queue = RB_ROOT;
sk->tcp_rtx_queue = RB_ROOT; sk->tcp_rtx_queue = RB_ROOT;
...@@ -428,7 +429,8 @@ void tcp_init_sock(struct sock *sk) ...@@ -428,7 +429,8 @@ void tcp_init_sock(struct sock *sk)
INIT_LIST_HEAD(&tp->tsorted_sent_queue); INIT_LIST_HEAD(&tp->tsorted_sent_queue);
icsk->icsk_rto = TCP_TIMEOUT_INIT; icsk->icsk_rto = TCP_TIMEOUT_INIT;
icsk->icsk_rto_min = TCP_RTO_MIN; rto_min_us = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_min_us);
icsk->icsk_rto_min = usecs_to_jiffies(rto_min_us);
icsk->icsk_delack_max = TCP_DELACK_MAX; icsk->icsk_delack_max = TCP_DELACK_MAX;
tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U); minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U);
......
...@@ -3502,6 +3502,7 @@ static int __net_init tcp_sk_init(struct net *net) ...@@ -3502,6 +3502,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_shrink_window = 0; net->ipv4.sysctl_tcp_shrink_window = 0;
net->ipv4.sysctl_tcp_pingpong_thresh = 1; net->ipv4.sysctl_tcp_pingpong_thresh = 1;
net->ipv4.sysctl_tcp_rto_min_us = jiffies_to_usecs(TCP_RTO_MIN);
return 0; return 0;
} }
......
...@@ -4163,16 +4163,9 @@ EXPORT_SYMBOL(tcp_connect); ...@@ -4163,16 +4163,9 @@ EXPORT_SYMBOL(tcp_connect);
u32 tcp_delack_max(const struct sock *sk) u32 tcp_delack_max(const struct sock *sk)
{ {
const struct dst_entry *dst = __sk_dst_get(sk); u32 delack_from_rto_min = max(tcp_rto_min(sk), 2) - 1;
u32 delack_max = inet_csk(sk)->icsk_delack_max;
if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) {
u32 rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
u32 delack_from_rto_min = max_t(int, 1, rto_min - 1);
delack_max = min_t(u32, delack_max, delack_from_rto_min); return min(inet_csk(sk)->icsk_delack_max, delack_from_rto_min);
}
return delack_max;
} }
/* Send out a delayed ack, the caller does the policy checking /* Send out a delayed ack, the caller does the policy checking
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment