Commit 6f01fd6e authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

af_unix: fix EPOLLET regression for stream sockets

Commit 0884d7aa (AF_UNIX: Fix poll blocking problem when reading from
a stream socket) added a regression for epoll() in Edge Triggered mode
(EPOLLET)

Appropriate fix is to use skb_peek()/skb_unlink() instead of
skb_dequeue(), and only call skb_unlink() when skb is fully consumed.

This remove the need to requeue a partial skb into sk_receive_queue head
and the extra sk->sk_data_ready() calls that added the regression.

This is safe because once skb is given to sk_receive_queue, it is not
modified by a writer, and readers are serialized by u->readlock mutex.

This also reduce number of spinlock acquisition for small reads or
MSG_PEEK users so should improve overall performance.
Reported-by: default avatarNick Mathewson <nickm@freehaven.net>
Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Cc: Alexey Moiseytsev <himeraster@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 5b35e1e6
...@@ -1918,7 +1918,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, ...@@ -1918,7 +1918,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
struct sk_buff *skb; struct sk_buff *skb;
unix_state_lock(sk); unix_state_lock(sk);
skb = skb_dequeue(&sk->sk_receive_queue); skb = skb_peek(&sk->sk_receive_queue);
if (skb == NULL) { if (skb == NULL) {
unix_sk(sk)->recursion_level = 0; unix_sk(sk)->recursion_level = 0;
if (copied >= target) if (copied >= target)
...@@ -1958,11 +1958,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, ...@@ -1958,11 +1958,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
if (check_creds) { if (check_creds) {
/* Never glue messages from different writers */ /* Never glue messages from different writers */
if ((UNIXCB(skb).pid != siocb->scm->pid) || if ((UNIXCB(skb).pid != siocb->scm->pid) ||
(UNIXCB(skb).cred != siocb->scm->cred)) { (UNIXCB(skb).cred != siocb->scm->cred))
skb_queue_head(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk, skb->len);
break; break;
}
} else { } else {
/* Copy credentials */ /* Copy credentials */
scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
...@@ -1977,8 +1974,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, ...@@ -1977,8 +1974,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
chunk = min_t(unsigned int, skb->len, size); chunk = min_t(unsigned int, skb->len, size);
if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
skb_queue_head(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk, skb->len);
if (copied == 0) if (copied == 0)
copied = -EFAULT; copied = -EFAULT;
break; break;
...@@ -1993,13 +1988,10 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, ...@@ -1993,13 +1988,10 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
if (UNIXCB(skb).fp) if (UNIXCB(skb).fp)
unix_detach_fds(siocb->scm, skb); unix_detach_fds(siocb->scm, skb);
/* put the skb back if we didn't use it up.. */ if (skb->len)
if (skb->len) {
skb_queue_head(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk, skb->len);
break; break;
}
skb_unlink(skb, &sk->sk_receive_queue);
consume_skb(skb); consume_skb(skb);
if (siocb->scm->fp) if (siocb->scm->fp)
...@@ -2010,9 +2002,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, ...@@ -2010,9 +2002,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
if (UNIXCB(skb).fp) if (UNIXCB(skb).fp)
siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
/* put message back and return */
skb_queue_head(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk, skb->len);
break; break;
} }
} while (size); } while (size);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment