Commit 2cd81161 authored by Arjun Roy's avatar Arjun Roy Committed by Jakub Kicinski

net-tcp: Introduce tcp_recvmsg_locked().

Refactor tcp_recvmsg() by splitting it into locked and unlocked
portions. Callers already holding the socket lock and not using
ERRQUEUE/cmsg/busy polling can simply call tcp_recvmsg_locked().
This is in preparation for a short-circuit copy performed by
TCP receive zerocopy for small (< PAGE_SIZE, or otherwise requested
by the user) reads.
Signed-off-by: default avatarArjun Roy <arjunroy@google.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarSoheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 18fb76ed
...@@ -2080,36 +2080,28 @@ static int tcp_inq_hint(struct sock *sk) ...@@ -2080,36 +2080,28 @@ static int tcp_inq_hint(struct sock *sk)
* Probably, code can be easily improved even more. * Probably, code can be easily improved even more.
*/ */
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
int flags, int *addr_len) int nonblock, int flags,
struct scm_timestamping_internal *tss,
int *cmsg_flags)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
int copied = 0; int copied = 0;
u32 peek_seq; u32 peek_seq;
u32 *seq; u32 *seq;
unsigned long used; unsigned long used;
int err, inq; int err;
int target; /* Read at least this many bytes */ int target; /* Read at least this many bytes */
long timeo; long timeo;
struct sk_buff *skb, *last; struct sk_buff *skb, *last;
u32 urg_hole = 0; u32 urg_hole = 0;
struct scm_timestamping_internal tss;
int cmsg_flags;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) &&
(sk->sk_state == TCP_ESTABLISHED))
sk_busy_loop(sk, nonblock);
lock_sock(sk);
err = -ENOTCONN; err = -ENOTCONN;
if (sk->sk_state == TCP_LISTEN) if (sk->sk_state == TCP_LISTEN)
goto out; goto out;
cmsg_flags = tp->recvmsg_inq ? 1 : 0; if (tp->recvmsg_inq)
*cmsg_flags = 1;
timeo = sock_rcvtimeo(sk, nonblock); timeo = sock_rcvtimeo(sk, nonblock);
/* Urgent data needs to be handled specially. */ /* Urgent data needs to be handled specially. */
...@@ -2289,8 +2281,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, ...@@ -2289,8 +2281,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
} }
if (TCP_SKB_CB(skb)->has_rxtstamp) { if (TCP_SKB_CB(skb)->has_rxtstamp) {
tcp_update_recv_tstamps(skb, &tss); tcp_update_recv_tstamps(skb, tss);
cmsg_flags |= 2; *cmsg_flags |= 2;
} }
if (used + offset < skb->len) if (used + offset < skb->len)
...@@ -2316,22 +2308,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, ...@@ -2316,22 +2308,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
/* Clean up data we have read: This will do ACK frames. */ /* Clean up data we have read: This will do ACK frames. */
tcp_cleanup_rbuf(sk, copied); tcp_cleanup_rbuf(sk, copied);
release_sock(sk);
if (cmsg_flags) {
if (cmsg_flags & 2)
tcp_recv_timestamp(msg, sk, &tss);
if (cmsg_flags & 1) {
inq = tcp_inq_hint(sk);
put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
}
}
return copied; return copied;
out: out:
release_sock(sk);
return err; return err;
recv_urg: recv_urg:
...@@ -2342,6 +2321,36 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, ...@@ -2342,6 +2321,36 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
err = tcp_peek_sndq(sk, msg, len); err = tcp_peek_sndq(sk, msg, len);
goto out; goto out;
} }
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len)
{
int cmsg_flags = 0, ret, inq;
struct scm_timestamping_internal tss;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
if (sk_can_busy_loop(sk) &&
skb_queue_empty_lockless(&sk->sk_receive_queue) &&
sk->sk_state == TCP_ESTABLISHED)
sk_busy_loop(sk, nonblock);
lock_sock(sk);
ret = tcp_recvmsg_locked(sk, msg, len, nonblock, flags, &tss,
&cmsg_flags);
release_sock(sk);
if (cmsg_flags && ret >= 0) {
if (cmsg_flags & 2)
tcp_recv_timestamp(msg, sk, &tss);
if (cmsg_flags & 1) {
inq = tcp_inq_hint(sk);
put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
}
}
return ret;
}
EXPORT_SYMBOL(tcp_recvmsg); EXPORT_SYMBOL(tcp_recvmsg);
void tcp_set_state(struct sock *sk, int state) void tcp_set_state(struct sock *sk, int state)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment