Commit 1ded5e5a authored by Eric Dumazet's avatar Eric Dumazet Committed by Jakub Kicinski

net: annotate data-races around sock->ops

IPV6_ADDRFORM socket option is evil, because it can change sock->ops
while other threads might read it. Same issue for sk->sk_family
being set to AF_INET.

Adding READ_ONCE() over sock->ops reads is needed for sockets
that might be impacted by IPV6_ADDRFORM.

Note that mptcp_is_tcpsk() can also overwrite sock->ops.

Adding annotations for all sk->sk_family reads will require
more patches :/

BUG: KCSAN: data-race in ____sys_sendmsg / do_ipv6_setsockopt

write to 0xffff888109f24ca0 of 8 bytes by task 4470 on cpu 0:
do_ipv6_setsockopt+0x2c5e/0x2ce0 net/ipv6/ipv6_sockglue.c:491
ipv6_setsockopt+0x57/0x130 net/ipv6/ipv6_sockglue.c:1012
udpv6_setsockopt+0x95/0xa0 net/ipv6/udp.c:1690
sock_common_setsockopt+0x61/0x70 net/core/sock.c:3663
__sys_setsockopt+0x1c3/0x230 net/socket.c:2273
__do_sys_setsockopt net/socket.c:2284 [inline]
__se_sys_setsockopt net/socket.c:2281 [inline]
__x64_sys_setsockopt+0x66/0x80 net/socket.c:2281
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

read to 0xffff888109f24ca0 of 8 bytes by task 4469 on cpu 1:
sock_sendmsg_nosec net/socket.c:724 [inline]
sock_sendmsg net/socket.c:747 [inline]
____sys_sendmsg+0x349/0x4c0 net/socket.c:2503
___sys_sendmsg net/socket.c:2557 [inline]
__sys_sendmmsg+0x263/0x500 net/socket.c:2643
__do_sys_sendmmsg net/socket.c:2672 [inline]
__se_sys_sendmmsg net/socket.c:2669 [inline]
__x64_sys_sendmmsg+0x57/0x60 net/socket.c:2669
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

value changed: 0xffffffff850e32b8 -> 0xffffffff850da890

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 4469 Comm: syz-executor.1 Not tainted 6.4.0-rc5-syzkaller-00313-g4c605260 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/25/2023
Reported-by: default avatarsyzbot <syzkaller@googlegroups.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarKuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20230808135809.2300241-1-edumazet@google.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent e05a53ab
...@@ -123,7 +123,7 @@ struct socket { ...@@ -123,7 +123,7 @@ struct socket {
struct file *file; struct file *file;
struct sock *sk; struct sock *sk;
const struct proto_ops *ops; const struct proto_ops *ops; /* Might change with IPV6_ADDRFORM or MPTCP. */
struct socket_wq wq; struct socket_wq wq;
}; };
......
...@@ -1019,7 +1019,7 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) ...@@ -1019,7 +1019,7 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
} }
} }
err = csocket->ops->connect(csocket, err = READ_ONCE(csocket->ops)->connect(csocket,
(struct sockaddr *)&sin_server, (struct sockaddr *)&sin_server,
sizeof(struct sockaddr_in), 0); sizeof(struct sockaddr_in), 0);
if (err < 0) { if (err < 0) {
...@@ -1060,7 +1060,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) ...@@ -1060,7 +1060,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
return err; return err;
} }
err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr *)&sun_server,
sizeof(struct sockaddr_un) - 1, 0); sizeof(struct sockaddr_un) - 1, 0);
if (err < 0) { if (err < 0) {
pr_err("%s (%d): problem connecting socket: %s: %d\n", pr_err("%s (%d): problem connecting socket: %s: %d\n",
......
...@@ -130,6 +130,7 @@ EXPORT_SYMBOL(__scm_destroy); ...@@ -130,6 +130,7 @@ EXPORT_SYMBOL(__scm_destroy);
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
{ {
const struct proto_ops *ops = READ_ONCE(sock->ops);
struct cmsghdr *cmsg; struct cmsghdr *cmsg;
int err; int err;
...@@ -153,7 +154,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) ...@@ -153,7 +154,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
switch (cmsg->cmsg_type) switch (cmsg->cmsg_type)
{ {
case SCM_RIGHTS: case SCM_RIGHTS:
if (!sock->ops || sock->ops->family != PF_UNIX) if (!ops || ops->family != PF_UNIX)
goto error; goto error;
err=scm_fp_copy(cmsg, &p->fp); err=scm_fp_copy(cmsg, &p->fp);
if (err<0) if (err<0)
......
...@@ -1198,13 +1198,17 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb) ...@@ -1198,13 +1198,17 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
static void sk_psock_verdict_data_ready(struct sock *sk) static void sk_psock_verdict_data_ready(struct sock *sk)
{ {
struct socket *sock = sk->sk_socket; struct socket *sock = sk->sk_socket;
const struct proto_ops *ops;
int copied; int copied;
trace_sk_data_ready(sk); trace_sk_data_ready(sk);
if (unlikely(!sock || !sock->ops || !sock->ops->read_skb)) if (unlikely(!sock))
return; return;
copied = sock->ops->read_skb(sk, sk_psock_verdict_recv); ops = READ_ONCE(sock->ops);
if (!ops || !ops->read_skb)
return;
copied = ops->read_skb(sk, sk_psock_verdict_recv);
if (copied >= 0) { if (copied >= 0) {
struct sk_psock *psock; struct sk_psock *psock;
......
...@@ -1277,14 +1277,19 @@ int sk_setsockopt(struct sock *sk, int level, int optname, ...@@ -1277,14 +1277,19 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
break; break;
case SO_RCVLOWAT: case SO_RCVLOWAT:
{
int (*set_rcvlowat)(struct sock *sk, int val) = NULL;
if (val < 0) if (val < 0)
val = INT_MAX; val = INT_MAX;
if (sock && sock->ops->set_rcvlowat) if (sock)
ret = sock->ops->set_rcvlowat(sk, val); set_rcvlowat = READ_ONCE(sock->ops)->set_rcvlowat;
if (set_rcvlowat)
ret = set_rcvlowat(sk, val);
else else
WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
break; break;
}
case SO_RCVTIMEO_OLD: case SO_RCVTIMEO_OLD:
case SO_RCVTIMEO_NEW: case SO_RCVTIMEO_NEW:
ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
...@@ -1379,11 +1384,16 @@ int sk_setsockopt(struct sock *sk, int level, int optname, ...@@ -1379,11 +1384,16 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
break; break;
case SO_PEEK_OFF: case SO_PEEK_OFF:
if (sock->ops->set_peek_off) {
ret = sock->ops->set_peek_off(sk, val); int (*set_peek_off)(struct sock *sk, int val);
set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
if (set_peek_off)
ret = set_peek_off(sk, val);
else else
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
break; break;
}
case SO_NOFCS: case SO_NOFCS:
sock_valbool_flag(sk, SOCK_NOFCS, valbool); sock_valbool_flag(sk, SOCK_NOFCS, valbool);
...@@ -1816,7 +1826,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname, ...@@ -1816,7 +1826,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
{ {
struct sockaddr_storage address; struct sockaddr_storage address;
lv = sock->ops->getname(sock, (struct sockaddr *)&address, 2); lv = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 2);
if (lv < 0) if (lv < 0)
return -ENOTCONN; return -ENOTCONN;
if (lv < len) if (lv < len)
...@@ -1858,7 +1868,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname, ...@@ -1858,7 +1868,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break; break;
case SO_PEEK_OFF: case SO_PEEK_OFF:
if (!sock->ops->set_peek_off) if (!READ_ONCE(sock->ops)->set_peek_off)
return -EOPNOTSUPP; return -EOPNOTSUPP;
v.val = READ_ONCE(sk->sk_peek_off); v.val = READ_ONCE(sk->sk_peek_off);
......
...@@ -474,8 +474,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, ...@@ -474,8 +474,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
WRITE_ONCE(sk->sk_prot, &tcp_prot); WRITE_ONCE(sk->sk_prot, &tcp_prot);
/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
WRITE_ONCE(icsk->icsk_af_ops, &ipv4_specific); WRITE_ONCE(icsk->icsk_af_ops, &ipv4_specific);
sk->sk_socket->ops = &inet_stream_ops; WRITE_ONCE(sk->sk_socket->ops, &inet_stream_ops);
sk->sk_family = PF_INET; WRITE_ONCE(sk->sk_family, PF_INET);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
} else { } else {
struct proto *prot = &udp_prot; struct proto *prot = &udp_prot;
...@@ -488,8 +488,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, ...@@ -488,8 +488,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
/* Paired with READ_ONCE(sk->sk_prot) in inet6_dgram_ops */ /* Paired with READ_ONCE(sk->sk_prot) in inet6_dgram_ops */
WRITE_ONCE(sk->sk_prot, prot); WRITE_ONCE(sk->sk_prot, prot);
sk->sk_socket->ops = &inet_dgram_ops; WRITE_ONCE(sk->sk_socket->ops, &inet_dgram_ops);
sk->sk_family = PF_INET; WRITE_ONCE(sk->sk_family, PF_INET);
} }
/* Disable all options not to allocate memory anymore, /* Disable all options not to allocate memory anymore,
......
...@@ -67,11 +67,11 @@ static bool mptcp_is_tcpsk(struct sock *sk) ...@@ -67,11 +67,11 @@ static bool mptcp_is_tcpsk(struct sock *sk)
* Hand the socket over to tcp so all further socket ops * Hand the socket over to tcp so all further socket ops
* bypass mptcp. * bypass mptcp.
*/ */
sock->ops = &inet_stream_ops; WRITE_ONCE(sock->ops, &inet_stream_ops);
return true; return true;
#if IS_ENABLED(CONFIG_MPTCP_IPV6) #if IS_ENABLED(CONFIG_MPTCP_IPV6)
} else if (unlikely(sk->sk_prot == &tcpv6_prot)) { } else if (unlikely(sk->sk_prot == &tcpv6_prot)) {
sock->ops = &inet6_stream_ops; WRITE_ONCE(sock->ops, &inet6_stream_ops);
return true; return true;
#endif #endif
} }
...@@ -3683,7 +3683,7 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) ...@@ -3683,7 +3683,7 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto unlock; goto unlock;
} }
err = ssock->ops->bind(ssock, uaddr, addr_len); err = READ_ONCE(ssock->ops)->bind(ssock, uaddr, addr_len);
if (!err) if (!err)
mptcp_copy_inaddrs(sock->sk, ssock->sk); mptcp_copy_inaddrs(sock->sk, ssock->sk);
...@@ -3717,7 +3717,7 @@ static int mptcp_listen(struct socket *sock, int backlog) ...@@ -3717,7 +3717,7 @@ static int mptcp_listen(struct socket *sock, int backlog)
inet_sk_state_store(sk, TCP_LISTEN); inet_sk_state_store(sk, TCP_LISTEN);
sock_set_flag(sk, SOCK_RCU_FREE); sock_set_flag(sk, SOCK_RCU_FREE);
err = ssock->ops->listen(ssock, backlog); err = READ_ONCE(ssock->ops)->listen(ssock, backlog);
inet_sk_state_store(sk, inet_sk_state_load(ssock->sk)); inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
if (!err) { if (!err) {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
......
This diff is collapsed.
...@@ -29,10 +29,11 @@ struct sock *unix_get_socket(struct file *filp) ...@@ -29,10 +29,11 @@ struct sock *unix_get_socket(struct file *filp)
/* Socket ? */ /* Socket ? */
if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
struct socket *sock = SOCKET_I(inode); struct socket *sock = SOCKET_I(inode);
const struct proto_ops *ops = READ_ONCE(sock->ops);
struct sock *s = sock->sk; struct sock *s = sock->sk;
/* PF_UNIX ? */ /* PF_UNIX ? */
if (s && sock->ops && sock->ops->family == PF_UNIX) if (s && ops && ops->family == PF_UNIX)
u_sock = s; u_sock = s;
} else { } else {
/* Could be an io_uring instance */ /* Could be an io_uring instance */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment