Commit e1d09c2c authored by Kuniyuki Iwashima's avatar Kuniyuki Iwashima Committed by Jakub Kicinski

af_unix: Fix data races around sk->sk_shutdown.

KCSAN found a data race around sk->sk_shutdown where unix_release_sock()
and unix_shutdown() update it under unix_state_lock(), OTOH unix_poll()
and unix_dgram_poll() read it locklessly.

We need to annotate the writes and reads with WRITE_ONCE() and READ_ONCE().

BUG: KCSAN: data-race in unix_poll / unix_release_sock

write to 0xffff88800d0f8aec of 1 bytes by task 264 on cpu 0:
 unix_release_sock+0x75c/0x910 net/unix/af_unix.c:631
 unix_release+0x59/0x80 net/unix/af_unix.c:1042
 __sock_release+0x7d/0x170 net/socket.c:653
 sock_close+0x19/0x30 net/socket.c:1397
 __fput+0x179/0x5e0 fs/file_table.c:321
 ____fput+0x15/0x20 fs/file_table.c:349
 task_work_run+0x116/0x1a0 kernel/task_work.c:179
 resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
 exit_to_user_mode_loop kernel/entry/common.c:171 [inline]
 exit_to_user_mode_prepare+0x174/0x180 kernel/entry/common.c:204
 __syscall_exit_to_user_mode_work kernel/entry/common.c:286 [inline]
 syscall_exit_to_user_mode+0x1a/0x30 kernel/entry/common.c:297
 do_syscall_64+0x4b/0x90 arch/x86/entry/common.c:86
 entry_SYSCALL_64_after_hwframe+0x72/0xdc

read to 0xffff88800d0f8aec of 1 bytes by task 222 on cpu 1:
 unix_poll+0xa3/0x2a0 net/unix/af_unix.c:3170
 sock_poll+0xcf/0x2b0 net/socket.c:1385
 vfs_poll include/linux/poll.h:88 [inline]
 ep_item_poll.isra.0+0x78/0xc0 fs/eventpoll.c:855
 ep_send_events fs/eventpoll.c:1694 [inline]
 ep_poll fs/eventpoll.c:1823 [inline]
 do_epoll_wait+0x6c4/0xea0 fs/eventpoll.c:2258
 __do_sys_epoll_wait fs/eventpoll.c:2270 [inline]
 __se_sys_epoll_wait fs/eventpoll.c:2265 [inline]
 __x64_sys_epoll_wait+0xcc/0x190 fs/eventpoll.c:2265
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x72/0xdc

value changed: 0x00 -> 0x03

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 222 Comm: dbus-broker Not tainted 6.3.0-rc7-02330-gca6270c12e20 #2
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014

Fixes: 3c73419c ("af_unix: fix 'poll for write'/ connected DGRAM sockets")
Fixes: 1da177e4 ("Linux-2.6.12-rc2")
Reported-by: default avatarsyzbot <syzkaller@googlegroups.com>
Signed-off-by: default avatarKuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarMichal Kubiak <michal.kubiak@intel.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 679ed006
...@@ -603,7 +603,7 @@ static void unix_release_sock(struct sock *sk, int embrion) ...@@ -603,7 +603,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
/* Clear state */ /* Clear state */
unix_state_lock(sk); unix_state_lock(sk);
sock_orphan(sk); sock_orphan(sk);
sk->sk_shutdown = SHUTDOWN_MASK; WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
path = u->path; path = u->path;
u->path.dentry = NULL; u->path.dentry = NULL;
u->path.mnt = NULL; u->path.mnt = NULL;
...@@ -628,7 +628,7 @@ static void unix_release_sock(struct sock *sk, int embrion) ...@@ -628,7 +628,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
unix_state_lock(skpair); unix_state_lock(skpair);
/* No more writes */ /* No more writes */
skpair->sk_shutdown = SHUTDOWN_MASK; WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
WRITE_ONCE(skpair->sk_err, ECONNRESET); WRITE_ONCE(skpair->sk_err, ECONNRESET);
unix_state_unlock(skpair); unix_state_unlock(skpair);
...@@ -3008,7 +3008,7 @@ static int unix_shutdown(struct socket *sock, int mode) ...@@ -3008,7 +3008,7 @@ static int unix_shutdown(struct socket *sock, int mode)
++mode; ++mode;
unix_state_lock(sk); unix_state_lock(sk);
sk->sk_shutdown |= mode; WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
other = unix_peer(sk); other = unix_peer(sk);
if (other) if (other)
sock_hold(other); sock_hold(other);
...@@ -3028,7 +3028,7 @@ static int unix_shutdown(struct socket *sock, int mode) ...@@ -3028,7 +3028,7 @@ static int unix_shutdown(struct socket *sock, int mode)
if (mode&SEND_SHUTDOWN) if (mode&SEND_SHUTDOWN)
peer_mode |= RCV_SHUTDOWN; peer_mode |= RCV_SHUTDOWN;
unix_state_lock(other); unix_state_lock(other);
other->sk_shutdown |= peer_mode; WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
unix_state_unlock(other); unix_state_unlock(other);
other->sk_state_change(other); other->sk_state_change(other);
if (peer_mode == SHUTDOWN_MASK) if (peer_mode == SHUTDOWN_MASK)
...@@ -3160,16 +3160,18 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa ...@@ -3160,16 +3160,18 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
{ {
struct sock *sk = sock->sk; struct sock *sk = sock->sk;
__poll_t mask; __poll_t mask;
u8 shutdown;
sock_poll_wait(file, sock, wait); sock_poll_wait(file, sock, wait);
mask = 0; mask = 0;
shutdown = READ_ONCE(sk->sk_shutdown);
/* exceptional events? */ /* exceptional events? */
if (READ_ONCE(sk->sk_err)) if (READ_ONCE(sk->sk_err))
mask |= EPOLLERR; mask |= EPOLLERR;
if (sk->sk_shutdown == SHUTDOWN_MASK) if (shutdown == SHUTDOWN_MASK)
mask |= EPOLLHUP; mask |= EPOLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN) if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
/* readable? */ /* readable? */
...@@ -3203,9 +3205,11 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, ...@@ -3203,9 +3205,11 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
struct sock *sk = sock->sk, *other; struct sock *sk = sock->sk, *other;
unsigned int writable; unsigned int writable;
__poll_t mask; __poll_t mask;
u8 shutdown;
sock_poll_wait(file, sock, wait); sock_poll_wait(file, sock, wait);
mask = 0; mask = 0;
shutdown = READ_ONCE(sk->sk_shutdown);
/* exceptional events? */ /* exceptional events? */
if (READ_ONCE(sk->sk_err) || if (READ_ONCE(sk->sk_err) ||
...@@ -3213,9 +3217,9 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, ...@@ -3213,9 +3217,9 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
mask |= EPOLLERR | mask |= EPOLLERR |
(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN) if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK) if (shutdown == SHUTDOWN_MASK)
mask |= EPOLLHUP; mask |= EPOLLHUP;
/* readable? */ /* readable? */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment