Commit 03f45c88 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

tcp: avoid extra wakeups for SO_RCVLOWAT users

SO_RCVLOWAT is properly handled in tcp_poll(), so that POLLIN is only
generated when enough bytes are available in receive queue, after
David change (commit c7004482 "tcp: Respect SO_RCVLOWAT in tcp_poll().")

But TCP still calls sk->sk_data_ready() for each chunk added in receive
queue, meaning thread is awaken, and goes back to sleep shortly after.

Tested:

tcp_mmap test program, receiving 32768 MB of data with SO_RCVLOWAT set to 512KB

-> Should get ~2 wakeups (c-switches) per MB, regardless of how many
(tiny or big) packets were received.

High speed (mostly full size GRO packets)

received 32768 MB (100 % mmap'ed) in 8.03112 s, 34.2266 Gbit,
  cpu usage user:0.037 sys:1.404, 43.9758 usec per MB, 65497 c-switches

received 32768 MB (99.9954 % mmap'ed) in 7.98453 s, 34.4263 Gbit,
  cpu usage user:0.03 sys:1.422, 44.3115 usec per MB, 65485 c-switches

Low speed (sender is ratelimited and sends 1-MSS at a time, so GRO is not helping)

received 22474.5 MB (100 % mmap'ed) in 6015.35 s, 0.0313414 Gbit,
  cpu usage user:0.05 sys:1.586, 72.7952 usec per MB, 44950 c-switches
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 796f82ea
...@@ -403,6 +403,7 @@ void tcp_syn_ack_timeout(const struct request_sock *req); ...@@ -403,6 +403,7 @@ void tcp_syn_ack_timeout(const struct request_sock *req);
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len); int flags, int *addr_len);
int tcp_set_rcvlowat(struct sock *sk, int val); int tcp_set_rcvlowat(struct sock *sk, int val);
void tcp_data_ready(struct sock *sk);
void tcp_parse_options(const struct net *net, const struct sk_buff *skb, void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
struct tcp_options_received *opt_rx, struct tcp_options_received *opt_rx,
int estab, struct tcp_fastopen_cookie *foc); int estab, struct tcp_fastopen_cookie *foc);
......
...@@ -1705,6 +1705,10 @@ EXPORT_SYMBOL(tcp_peek_len); ...@@ -1705,6 +1705,10 @@ EXPORT_SYMBOL(tcp_peek_len);
int tcp_set_rcvlowat(struct sock *sk, int val) int tcp_set_rcvlowat(struct sock *sk, int val)
{ {
sk->sk_rcvlowat = val ? : 1; sk->sk_rcvlowat = val ? : 1;
/* Check if we need to signal EPOLLIN right now */
tcp_data_ready(sk);
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
return 0; return 0;
......
...@@ -4576,6 +4576,17 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -4576,6 +4576,17 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
} }
void tcp_data_ready(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
int avail = tp->rcv_nxt - tp->copied_seq;
if (avail < sk->sk_rcvlowat && !sock_flag(sk, SOCK_DONE))
return;
sk->sk_data_ready(sk);
}
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
...@@ -4633,7 +4644,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) ...@@ -4633,7 +4644,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
if (eaten > 0) if (eaten > 0)
kfree_skb_partial(skb, fragstolen); kfree_skb_partial(skb, fragstolen);
if (!sock_flag(sk, SOCK_DEAD)) if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk); tcp_data_ready(sk);
return; return;
} }
...@@ -5434,7 +5445,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, ...@@ -5434,7 +5445,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
no_ack: no_ack:
if (eaten) if (eaten)
kfree_skb_partial(skb, fragstolen); kfree_skb_partial(skb, fragstolen);
sk->sk_data_ready(sk); tcp_data_ready(sk);
return; return;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment