Commit 9b5cbf46 authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-exponential-backoff-in-tcp_send_ack'

Eric Dumazet says:

====================
tcp: exponential backoff in tcp_send_ack()

We had outages caused by repeated skb allocation failures in tcp_send_ack()

It is time to add exponential backoff to reduce number of attempts.
Before doing so, first patch removes icsk_ack.blocked to make
room for a new field (icsk_ack.retry)
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 20c168be a37c2134
...@@ -110,7 +110,7 @@ struct inet_connection_sock { ...@@ -110,7 +110,7 @@ struct inet_connection_sock {
__u8 pending; /* ACK is pending */ __u8 pending; /* ACK is pending */
__u8 quick; /* Scheduled number of quick acks */ __u8 quick; /* Scheduled number of quick acks */
__u8 pingpong; /* The session is interactive */ __u8 pingpong; /* The session is interactive */
__u8 blocked; /* Delayed ACK was blocked by socket lock */ __u8 retry; /* Number of attempts */
__u32 ato; /* Predicted tick of soft clock */ __u32 ato; /* Predicted tick of soft clock */
unsigned long timeout; /* Currently scheduled timeout */ unsigned long timeout; /* Currently scheduled timeout */
__u32 lrcvtime; /* timestamp of last received data packet */ __u32 lrcvtime; /* timestamp of last received data packet */
...@@ -198,7 +198,8 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) ...@@ -198,7 +198,8 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
sk_stop_timer(sk, &icsk->icsk_retransmit_timer); sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
#endif #endif
} else if (what == ICSK_TIME_DACK) { } else if (what == ICSK_TIME_DACK) {
icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; icsk->icsk_ack.pending = 0;
icsk->icsk_ack.retry = 0;
#ifdef INET_CSK_CLEAR_TIMERS #ifdef INET_CSK_CLEAR_TIMERS
sk_stop_timer(sk, &icsk->icsk_delack_timer); sk_stop_timer(sk, &icsk->icsk_delack_timer);
#endif #endif
......
...@@ -176,7 +176,6 @@ static void dccp_delack_timer(struct timer_list *t) ...@@ -176,7 +176,6 @@ static void dccp_delack_timer(struct timer_list *t)
bh_lock_sock(sk); bh_lock_sock(sk);
if (sock_owned_by_user(sk)) { if (sock_owned_by_user(sk)) {
/* Try again later. */ /* Try again later. */
icsk->icsk_ack.blocked = 1;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
sk_reset_timer(sk, &icsk->icsk_delack_timer, sk_reset_timer(sk, &icsk->icsk_delack_timer,
jiffies + TCP_DELACK_MIN); jiffies + TCP_DELACK_MIN);
......
...@@ -564,7 +564,7 @@ void inet_csk_clear_xmit_timers(struct sock *sk) ...@@ -564,7 +564,7 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
{ {
struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock *icsk = inet_csk(sk);
icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; icsk->icsk_pending = icsk->icsk_ack.pending = 0;
sk_stop_timer(sk, &icsk->icsk_retransmit_timer); sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
sk_stop_timer(sk, &icsk->icsk_delack_timer); sk_stop_timer(sk, &icsk->icsk_delack_timer);
......
...@@ -1538,10 +1538,8 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) ...@@ -1538,10 +1538,8 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
if (inet_csk_ack_scheduled(sk)) { if (inet_csk_ack_scheduled(sk)) {
const struct inet_connection_sock *icsk = inet_csk(sk); const struct inet_connection_sock *icsk = inet_csk(sk);
/* Delayed ACKs frequently hit locked sockets during bulk
* receive. */ if (/* Once-per-two-segments ACK was not sent by tcp_input.c */
if (icsk->icsk_ack.blocked ||
/* Once-per-two-segments ACK was not sent by tcp_input.c */
tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
/* /*
* If this read emptied read buffer, we send ACK, if * If this read emptied read buffer, we send ACK, if
......
...@@ -3911,11 +3911,8 @@ void tcp_send_delayed_ack(struct sock *sk) ...@@ -3911,11 +3911,8 @@ void tcp_send_delayed_ack(struct sock *sk)
/* Use new timeout only if there wasn't a older one earlier. */ /* Use new timeout only if there wasn't a older one earlier. */
if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
/* If delack timer was blocked or is about to expire, /* If delack timer is about to expire, send ACK now. */
* send ACK now. if (time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
*/
if (icsk->icsk_ack.blocked ||
time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
tcp_send_ack(sk); tcp_send_ack(sk);
return; return;
} }
...@@ -3944,10 +3941,15 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) ...@@ -3944,10 +3941,15 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
buff = alloc_skb(MAX_TCP_HEADER, buff = alloc_skb(MAX_TCP_HEADER,
sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN)); sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
if (unlikely(!buff)) { if (unlikely(!buff)) {
struct inet_connection_sock *icsk = inet_csk(sk);
unsigned long delay;
delay = TCP_DELACK_MAX << icsk->icsk_ack.retry;
if (delay < TCP_RTO_MAX)
icsk->icsk_ack.retry++;
inet_csk_schedule_ack(sk); inet_csk_schedule_ack(sk);
inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; icsk->icsk_ack.ato = TCP_ATO_MIN;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, delay, TCP_RTO_MAX);
TCP_DELACK_MAX, TCP_RTO_MAX);
return; return;
} }
......
...@@ -331,7 +331,6 @@ static void tcp_delack_timer(struct timer_list *t) ...@@ -331,7 +331,6 @@ static void tcp_delack_timer(struct timer_list *t)
if (!sock_owned_by_user(sk)) { if (!sock_owned_by_user(sk)) {
tcp_delack_timer_handler(sk); tcp_delack_timer_handler(sk);
} else { } else {
icsk->icsk_ack.blocked = 1;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
/* deleguate our work to tcp_release_cb() */ /* deleguate our work to tcp_release_cb() */
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags)) if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment