Commit 6e1ce3c3 authored by Linus Torvalds's avatar Linus Torvalds Committed by David S. Miller

af_unix: split 'u->readlock' into two: 'iolock' and 'bindlock'

Right now we use the 'readlock' both for protecting some of the af_unix
IO path and for making the bind be single-threaded.

The two are independent, but using the same lock makes for a nasty
deadlock due to ordering with regards to filesystem locking.  The bind
locking would want to nest outside the VSF pathname locking, but the IO
locking wants to nest inside some of those same locks.

We tried to fix this earlier with commit c845acb3 ("af_unix: Fix
splice-bind deadlock") which moved the readlock inside the vfs locks,
but that caused problems with overlayfs that will then call back into
filesystem routines that take the lock in the wrong order anyway.

Splitting the locks means that we can go back to having the bind lock be
the outermost lock, and we don't have any deadlocks with lock ordering.
Acked-by: default avatarRainer Weikusat <rweikusat@cyberadapt.com>
Acked-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Acked-by: default avatarHannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 38f7bd94
...@@ -52,7 +52,7 @@ struct unix_sock { ...@@ -52,7 +52,7 @@ struct unix_sock {
struct sock sk; struct sock sk;
struct unix_address *addr; struct unix_address *addr;
struct path path; struct path path;
struct mutex readlock; struct mutex iolock, bindlock;
struct sock *peer; struct sock *peer;
struct list_head link; struct list_head link;
atomic_long_t inflight; atomic_long_t inflight;
......
...@@ -661,11 +661,11 @@ static int unix_set_peek_off(struct sock *sk, int val) ...@@ -661,11 +661,11 @@ static int unix_set_peek_off(struct sock *sk, int val)
{ {
struct unix_sock *u = unix_sk(sk); struct unix_sock *u = unix_sk(sk);
if (mutex_lock_interruptible(&u->readlock)) if (mutex_lock_interruptible(&u->iolock))
return -EINTR; return -EINTR;
sk->sk_peek_off = val; sk->sk_peek_off = val;
mutex_unlock(&u->readlock); mutex_unlock(&u->iolock);
return 0; return 0;
} }
...@@ -779,7 +779,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) ...@@ -779,7 +779,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
spin_lock_init(&u->lock); spin_lock_init(&u->lock);
atomic_long_set(&u->inflight, 0); atomic_long_set(&u->inflight, 0);
INIT_LIST_HEAD(&u->link); INIT_LIST_HEAD(&u->link);
mutex_init(&u->readlock); /* single task reading lock */ mutex_init(&u->iolock); /* single task reading lock */
mutex_init(&u->bindlock); /* single task binding lock */
init_waitqueue_head(&u->peer_wait); init_waitqueue_head(&u->peer_wait);
init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
unix_insert_socket(unix_sockets_unbound(sk), sk); unix_insert_socket(unix_sockets_unbound(sk), sk);
...@@ -848,7 +849,7 @@ static int unix_autobind(struct socket *sock) ...@@ -848,7 +849,7 @@ static int unix_autobind(struct socket *sock)
int err; int err;
unsigned int retries = 0; unsigned int retries = 0;
err = mutex_lock_interruptible(&u->readlock); err = mutex_lock_interruptible(&u->bindlock);
if (err) if (err)
return err; return err;
...@@ -895,7 +896,7 @@ static int unix_autobind(struct socket *sock) ...@@ -895,7 +896,7 @@ static int unix_autobind(struct socket *sock)
spin_unlock(&unix_table_lock); spin_unlock(&unix_table_lock);
err = 0; err = 0;
out: mutex_unlock(&u->readlock); out: mutex_unlock(&u->bindlock);
return err; return err;
} }
...@@ -1009,7 +1010,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) ...@@ -1009,7 +1010,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out; goto out;
addr_len = err; addr_len = err;
err = mutex_lock_interruptible(&u->readlock); err = mutex_lock_interruptible(&u->bindlock);
if (err) if (err)
goto out; goto out;
...@@ -1063,7 +1064,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) ...@@ -1063,7 +1064,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
out_unlock: out_unlock:
spin_unlock(&unix_table_lock); spin_unlock(&unix_table_lock);
out_up: out_up:
mutex_unlock(&u->readlock); mutex_unlock(&u->bindlock);
out: out:
return err; return err;
} }
...@@ -1955,17 +1956,17 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, ...@@ -1955,17 +1956,17 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
if (false) { if (false) {
alloc_skb: alloc_skb:
unix_state_unlock(other); unix_state_unlock(other);
mutex_unlock(&unix_sk(other)->readlock); mutex_unlock(&unix_sk(other)->iolock);
newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
&err, 0); &err, 0);
if (!newskb) if (!newskb)
goto err; goto err;
} }
/* we must acquire readlock as we modify already present /* we must acquire iolock as we modify already present
* skbs in the sk_receive_queue and mess with skb->len * skbs in the sk_receive_queue and mess with skb->len
*/ */
err = mutex_lock_interruptible(&unix_sk(other)->readlock); err = mutex_lock_interruptible(&unix_sk(other)->iolock);
if (err) { if (err) {
err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
goto err; goto err;
...@@ -2032,7 +2033,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, ...@@ -2032,7 +2033,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
} }
unix_state_unlock(other); unix_state_unlock(other);
mutex_unlock(&unix_sk(other)->readlock); mutex_unlock(&unix_sk(other)->iolock);
other->sk_data_ready(other); other->sk_data_ready(other);
scm_destroy(&scm); scm_destroy(&scm);
...@@ -2041,7 +2042,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, ...@@ -2041,7 +2042,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
err_state_unlock: err_state_unlock:
unix_state_unlock(other); unix_state_unlock(other);
err_unlock: err_unlock:
mutex_unlock(&unix_sk(other)->readlock); mutex_unlock(&unix_sk(other)->iolock);
err: err:
kfree_skb(newskb); kfree_skb(newskb);
if (send_sigpipe && !(flags & MSG_NOSIGNAL)) if (send_sigpipe && !(flags & MSG_NOSIGNAL))
...@@ -2109,7 +2110,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, ...@@ -2109,7 +2110,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do { do {
mutex_lock(&u->readlock); mutex_lock(&u->iolock);
skip = sk_peek_offset(sk, flags); skip = sk_peek_offset(sk, flags);
skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err, skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
...@@ -2117,14 +2118,14 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, ...@@ -2117,14 +2118,14 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
if (skb) if (skb)
break; break;
mutex_unlock(&u->readlock); mutex_unlock(&u->iolock);
if (err != -EAGAIN) if (err != -EAGAIN)
break; break;
} while (timeo && } while (timeo &&
!__skb_wait_for_more_packets(sk, &err, &timeo, last)); !__skb_wait_for_more_packets(sk, &err, &timeo, last));
if (!skb) { /* implies readlock unlocked */ if (!skb) { /* implies iolock unlocked */
unix_state_lock(sk); unix_state_lock(sk);
/* Signal EOF on disconnected non-blocking SEQPACKET socket. */ /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
...@@ -2189,7 +2190,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, ...@@ -2189,7 +2190,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
out_free: out_free:
skb_free_datagram(sk, skb); skb_free_datagram(sk, skb);
mutex_unlock(&u->readlock); mutex_unlock(&u->iolock);
out: out:
return err; return err;
} }
...@@ -2284,7 +2285,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) ...@@ -2284,7 +2285,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
/* Lock the socket to prevent queue disordering /* Lock the socket to prevent queue disordering
* while sleeps in memcpy_tomsg * while sleeps in memcpy_tomsg
*/ */
mutex_lock(&u->readlock); mutex_lock(&u->iolock);
if (flags & MSG_PEEK) if (flags & MSG_PEEK)
skip = sk_peek_offset(sk, flags); skip = sk_peek_offset(sk, flags);
...@@ -2326,7 +2327,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) ...@@ -2326,7 +2327,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
break; break;
} }
mutex_unlock(&u->readlock); mutex_unlock(&u->iolock);
timeo = unix_stream_data_wait(sk, timeo, last, timeo = unix_stream_data_wait(sk, timeo, last,
last_len); last_len);
...@@ -2337,7 +2338,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) ...@@ -2337,7 +2338,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
goto out; goto out;
} }
mutex_lock(&u->readlock); mutex_lock(&u->iolock);
goto redo; goto redo;
unlock: unlock:
unix_state_unlock(sk); unix_state_unlock(sk);
...@@ -2440,7 +2441,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) ...@@ -2440,7 +2441,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
} }
} while (size); } while (size);
mutex_unlock(&u->readlock); mutex_unlock(&u->iolock);
if (state->msg) if (state->msg)
scm_recv(sock, state->msg, &scm, flags); scm_recv(sock, state->msg, &scm, flags);
else else
...@@ -2481,9 +2482,9 @@ static ssize_t skb_unix_socket_splice(struct sock *sk, ...@@ -2481,9 +2482,9 @@ static ssize_t skb_unix_socket_splice(struct sock *sk,
int ret; int ret;
struct unix_sock *u = unix_sk(sk); struct unix_sock *u = unix_sk(sk);
mutex_unlock(&u->readlock); mutex_unlock(&u->iolock);
ret = splice_to_pipe(pipe, spd); ret = splice_to_pipe(pipe, spd);
mutex_lock(&u->readlock); mutex_lock(&u->iolock);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment