Commit a2cbb160 authored by Subash Abhinov Kasiviswanathan's avatar Subash Abhinov Kasiviswanathan Committed by David S. Miller

tcp: Update window clamping condition

This patch is based on the discussions between Neal Cardwell and
Eric Dumazet in the link
https://lore.kernel.org/netdev/20240726204105.1466841-1-quic_subashab@quicinc.com/

It was correctly pointed out that tp->window_clamp would not be
updated in cases where net.ipv4.tcp_moderate_rcvbuf=0 or if
(copied <= tp->rcvq_space.space). While it is expected for most
setups to leave the sysctl enabled, the latter condition may
not end up hitting depending on the TCP receive queue size and
the pattern of arriving data.

The updated check should be hit only on initial MSS update from
TCP_MIN_MSS to measured MSS value and subsequently if there was
an update to a larger value.

Fixes: 05f76b2d ("tcp: Adjust clamping window for applications specifying SO_RCVBUF")
Signed-off-by: default avatarSean Tranchetti <quic_stranche@quicinc.com>
Signed-off-by: default avatarSubash Abhinov Kasiviswanathan <quic_subashab@quicinc.com>
Acked-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 655111b8
...@@ -238,9 +238,14 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) ...@@ -238,9 +238,14 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
*/ */
if (unlikely(len != icsk->icsk_ack.rcv_mss)) { if (unlikely(len != icsk->icsk_ack.rcv_mss)) {
u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE; u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE;
u8 old_ratio = tcp_sk(sk)->scaling_ratio;
do_div(val, skb->truesize); do_div(val, skb->truesize);
tcp_sk(sk)->scaling_ratio = val ? val : 1; tcp_sk(sk)->scaling_ratio = val ? val : 1;
if (old_ratio != tcp_sk(sk)->scaling_ratio)
WRITE_ONCE(tcp_sk(sk)->window_clamp,
tcp_win_from_space(sk, sk->sk_rcvbuf));
} }
icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
tcp_sk(sk)->advmss); tcp_sk(sk)->advmss);
...@@ -754,7 +759,8 @@ void tcp_rcv_space_adjust(struct sock *sk) ...@@ -754,7 +759,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
* <prev RTT . ><current RTT .. ><next RTT .... > * <prev RTT . ><current RTT .. ><next RTT .... >
*/ */
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)) { if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
u64 rcvwin, grow; u64 rcvwin, grow;
int rcvbuf; int rcvbuf;
...@@ -770,22 +776,12 @@ void tcp_rcv_space_adjust(struct sock *sk) ...@@ -770,22 +776,12 @@ void tcp_rcv_space_adjust(struct sock *sk)
rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin), rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin),
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { if (rcvbuf > sk->sk_rcvbuf) {
if (rcvbuf > sk->sk_rcvbuf) { WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
/* Make the window clamp follow along. */
WRITE_ONCE(tp->window_clamp,
tcp_win_from_space(sk, rcvbuf));
}
} else {
/* Make the window clamp follow along while being bounded
* by SO_RCVBUF.
*/
int clamp = tcp_win_from_space(sk, min(rcvbuf, sk->sk_rcvbuf));
if (clamp > tp->window_clamp) /* Make the window clamp follow along. */
WRITE_ONCE(tp->window_clamp, clamp); WRITE_ONCE(tp->window_clamp,
tcp_win_from_space(sk, rcvbuf));
} }
} }
tp->rcvq_space.space = copied; tp->rcvq_space.space = copied;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment