Commit 86de5921 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

tcp: defer SACK compression after DupThresh

Jean-Louis reported a TCP regression and bisected to recent SACK
compression.

After a loss episode (receiver not able to keep up and dropping
packets because its backlog is full), linux TCP stack is sending
a single SACK (DUPACK).

Sender waits a full RTO timer before recovering losses.

While RFC 6675 says in section 5, "Algorithm Details",

   (2) If DupAcks < DupThresh but IsLost (HighACK + 1) returns true --
       indicating at least three segments have arrived above the current
       cumulative acknowledgment point, which is taken to indicate loss
       -- go to step (4).
...
   (4) Invoke fast retransmit and enter loss recovery as follows:

there are old TCP stacks not implementing this strategy, and
still counting the dupacks before starting fast retransmit.

While these stacks probably perform poorly when receivers implement
LRO/GRO, we should be a little more gentle to them.

This patch makes sure we do not enable SACK compression unless
3 dupacks have been sent since last rcv_nxt update.

Ideally we should even rearm the timer to send one or two
more DUPACK if no more packets are coming, but that will
be work aiming for linux-4.21.

Many thanks to Jean-Louis for bisecting the issue, providing
packet captures and testing this patch.

Fixes: 5d9f4262 ("tcp: add SACK compression")
Reported-by: default avatarJean-Louis Dupond <jean-louis@dupond.be>
Tested-by: default avatarJean-Louis Dupond <jean-louis@dupond.be>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Acked-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent b5dd186d
...@@ -196,6 +196,7 @@ struct tcp_sock { ...@@ -196,6 +196,7 @@ struct tcp_sock {
u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ u32 lsndtime; /* timestamp of last sent data packet (for restart window) */
u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */ u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */
u32 compressed_ack_rcv_nxt;
u32 tsoffset; /* timestamp offset */ u32 tsoffset; /* timestamp offset */
......
...@@ -4268,7 +4268,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) ...@@ -4268,7 +4268,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
* If the sack array is full, forget about the last one. * If the sack array is full, forget about the last one.
*/ */
if (this_sack >= TCP_NUM_SACKS) { if (this_sack >= TCP_NUM_SACKS) {
if (tp->compressed_ack) if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk); tcp_send_ack(sk);
this_sack--; this_sack--;
tp->rx_opt.num_sacks--; tp->rx_opt.num_sacks--;
...@@ -5189,7 +5189,17 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) ...@@ -5189,7 +5189,17 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
if (!tcp_is_sack(tp) || if (!tcp_is_sack(tp) ||
tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr) tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
goto send_now; goto send_now;
tp->compressed_ack++;
if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
tp->compressed_ack - TCP_FASTRETRANS_THRESH);
tp->compressed_ack = 0;
}
if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
goto send_now;
if (hrtimer_is_queued(&tp->compressed_ack_timer)) if (hrtimer_is_queued(&tp->compressed_ack_timer))
return; return;
......
...@@ -180,10 +180,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, ...@@ -180,10 +180,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
if (unlikely(tp->compressed_ack)) { if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED, NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
tp->compressed_ack); tp->compressed_ack - TCP_FASTRETRANS_THRESH);
tp->compressed_ack = 0; tp->compressed_ack = TCP_FASTRETRANS_THRESH;
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1) if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk); __sock_put(sk);
} }
......
...@@ -740,7 +740,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer) ...@@ -740,7 +740,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
bh_lock_sock(sk); bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) { if (!sock_owned_by_user(sk)) {
if (tp->compressed_ack) if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk); tcp_send_ack(sk);
} else { } else {
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment