Commit f410cbea authored by Eric Dumazet's avatar Eric Dumazet Committed by Jakub Kicinski

tcp: annotate data-races around tp->window_clamp

tp->window_clamp can be read locklessly, add READ_ONCE()
and WRITE_ONCE() annotations.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarJason Xing <kerneljasonxing@gmail.com>
Link: https://lore.kernel.org/r/20240404114231.2195171-1-edumazet@google.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 571faefe
...@@ -462,7 +462,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ...@@ -462,7 +462,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
} }
/* Try to redo what tcp_v4_send_synack did. */ /* Try to redo what tcp_v4_send_synack did. */
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :
dst_metric(&rt->dst, RTAX_WINDOW);
/* limit the window selection if the user enforce a smaller rx buffer */ /* limit the window selection if the user enforce a smaller rx buffer */
full_space = tcp_full_space(sk); full_space = tcp_full_space(sk);
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
......
...@@ -1721,7 +1721,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val) ...@@ -1721,7 +1721,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
space = tcp_space_from_win(sk, val); space = tcp_space_from_win(sk, val);
if (space > sk->sk_rcvbuf) { if (space > sk->sk_rcvbuf) {
WRITE_ONCE(sk->sk_rcvbuf, space); WRITE_ONCE(sk->sk_rcvbuf, space);
tcp_sk(sk)->window_clamp = val; WRITE_ONCE(tcp_sk(sk)->window_clamp, val);
} }
return 0; return 0;
} }
...@@ -3379,7 +3379,7 @@ int tcp_set_window_clamp(struct sock *sk, int val) ...@@ -3379,7 +3379,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
if (!val) { if (!val) {
if (sk->sk_state != TCP_CLOSE) if (sk->sk_state != TCP_CLOSE)
return -EINVAL; return -EINVAL;
tp->window_clamp = 0; WRITE_ONCE(tp->window_clamp, 0);
} else { } else {
u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp; u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp;
u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ? u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
...@@ -3388,7 +3388,7 @@ int tcp_set_window_clamp(struct sock *sk, int val) ...@@ -3388,7 +3388,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
if (new_window_clamp == old_window_clamp) if (new_window_clamp == old_window_clamp)
return 0; return 0;
tp->window_clamp = new_window_clamp; WRITE_ONCE(tp->window_clamp, new_window_clamp);
if (new_window_clamp < old_window_clamp) { if (new_window_clamp < old_window_clamp) {
/* need to apply the reserved mem provisioning only /* need to apply the reserved mem provisioning only
* when shrinking the window clamp * when shrinking the window clamp
...@@ -4057,7 +4057,7 @@ int do_tcp_getsockopt(struct sock *sk, int level, ...@@ -4057,7 +4057,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
TCP_RTO_MAX / HZ); TCP_RTO_MAX / HZ);
break; break;
case TCP_WINDOW_CLAMP: case TCP_WINDOW_CLAMP:
val = tp->window_clamp; val = READ_ONCE(tp->window_clamp);
break; break;
case TCP_INFO: { case TCP_INFO: {
struct tcp_info info; struct tcp_info info;
......
...@@ -563,19 +563,20 @@ static void tcp_init_buffer_space(struct sock *sk) ...@@ -563,19 +563,20 @@ static void tcp_init_buffer_space(struct sock *sk)
maxwin = tcp_full_space(sk); maxwin = tcp_full_space(sk);
if (tp->window_clamp >= maxwin) { if (tp->window_clamp >= maxwin) {
tp->window_clamp = maxwin; WRITE_ONCE(tp->window_clamp, maxwin);
if (tcp_app_win && maxwin > 4 * tp->advmss) if (tcp_app_win && maxwin > 4 * tp->advmss)
tp->window_clamp = max(maxwin - WRITE_ONCE(tp->window_clamp,
(maxwin >> tcp_app_win), max(maxwin - (maxwin >> tcp_app_win),
4 * tp->advmss); 4 * tp->advmss));
} }
/* Force reservation of one segment. */ /* Force reservation of one segment. */
if (tcp_app_win && if (tcp_app_win &&
tp->window_clamp > 2 * tp->advmss && tp->window_clamp > 2 * tp->advmss &&
tp->window_clamp + tp->advmss > maxwin) tp->window_clamp + tp->advmss > maxwin)
tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss); WRITE_ONCE(tp->window_clamp,
max(2 * tp->advmss, maxwin - tp->advmss));
tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp); tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
tp->snd_cwnd_stamp = tcp_jiffies32; tp->snd_cwnd_stamp = tcp_jiffies32;
...@@ -773,7 +774,8 @@ void tcp_rcv_space_adjust(struct sock *sk) ...@@ -773,7 +774,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
/* Make the window clamp follow along. */ /* Make the window clamp follow along. */
tp->window_clamp = tcp_win_from_space(sk, rcvbuf); WRITE_ONCE(tp->window_clamp,
tcp_win_from_space(sk, rcvbuf));
} }
} }
tp->rcvq_space.space = copied; tp->rcvq_space.space = copied;
...@@ -6426,7 +6428,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -6426,7 +6428,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
if (!tp->rx_opt.wscale_ok) { if (!tp->rx_opt.wscale_ok) {
tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
tp->window_clamp = min(tp->window_clamp, 65535U); WRITE_ONCE(tp->window_clamp,
min(tp->window_clamp, 65535U));
} }
if (tp->rx_opt.saw_tstamp) { if (tp->rx_opt.saw_tstamp) {
......
...@@ -203,16 +203,17 @@ static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt) ...@@ -203,16 +203,17 @@ static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt)
* This MUST be enforced by all callers. * This MUST be enforced by all callers.
*/ */
void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
__u32 *rcv_wnd, __u32 *window_clamp, __u32 *rcv_wnd, __u32 *__window_clamp,
int wscale_ok, __u8 *rcv_wscale, int wscale_ok, __u8 *rcv_wscale,
__u32 init_rcv_wnd) __u32 init_rcv_wnd)
{ {
unsigned int space = (__space < 0 ? 0 : __space); unsigned int space = (__space < 0 ? 0 : __space);
u32 window_clamp = READ_ONCE(*__window_clamp);
/* If no clamp set the clamp to the max possible scaled window */ /* If no clamp set the clamp to the max possible scaled window */
if (*window_clamp == 0) if (window_clamp == 0)
(*window_clamp) = (U16_MAX << TCP_MAX_WSCALE); window_clamp = (U16_MAX << TCP_MAX_WSCALE);
space = min(*window_clamp, space); space = min(window_clamp, space);
/* Quantize space offering to a multiple of mss if possible. */ /* Quantize space offering to a multiple of mss if possible. */
if (space > mss) if (space > mss)
...@@ -239,12 +240,13 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, ...@@ -239,12 +240,13 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
/* Set window scaling on max possible window */ /* Set window scaling on max possible window */
space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
space = max_t(u32, space, READ_ONCE(sysctl_rmem_max)); space = max_t(u32, space, READ_ONCE(sysctl_rmem_max));
space = min_t(u32, space, *window_clamp); space = min_t(u32, space, window_clamp);
*rcv_wscale = clamp_t(int, ilog2(space) - 15, *rcv_wscale = clamp_t(int, ilog2(space) - 15,
0, TCP_MAX_WSCALE); 0, TCP_MAX_WSCALE);
} }
/* Set the clamp no higher than max representable value */ /* Set the clamp no higher than max representable value */
(*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp); WRITE_ONCE(*__window_clamp,
min_t(__u32, U16_MAX << (*rcv_wscale), window_clamp));
} }
EXPORT_SYMBOL(tcp_select_initial_window); EXPORT_SYMBOL(tcp_select_initial_window);
...@@ -3855,7 +3857,7 @@ static void tcp_connect_init(struct sock *sk) ...@@ -3855,7 +3857,7 @@ static void tcp_connect_init(struct sock *sk)
tcp_ca_dst_init(sk, dst); tcp_ca_dst_init(sk, dst);
if (!tp->window_clamp) if (!tp->window_clamp)
tp->window_clamp = dst_metric(dst, RTAX_WINDOW); WRITE_ONCE(tp->window_clamp, dst_metric(dst, RTAX_WINDOW));
tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
tcp_initialize_rcv_mss(sk); tcp_initialize_rcv_mss(sk);
...@@ -3863,7 +3865,7 @@ static void tcp_connect_init(struct sock *sk) ...@@ -3863,7 +3865,7 @@ static void tcp_connect_init(struct sock *sk)
/* limit the window selection if the user enforce a smaller rx buffer */ /* limit the window selection if the user enforce a smaller rx buffer */
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
(tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0)) (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
tp->window_clamp = tcp_full_space(sk); WRITE_ONCE(tp->window_clamp, tcp_full_space(sk));
rcv_wnd = tcp_rwnd_init_bpf(sk); rcv_wnd = tcp_rwnd_init_bpf(sk);
if (rcv_wnd == 0) if (rcv_wnd == 0)
......
...@@ -246,7 +246,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ...@@ -246,7 +246,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
} }
} }
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :dst_metric(dst, RTAX_WINDOW);
/* limit the window selection if the user enforce a smaller rx buffer */ /* limit the window selection if the user enforce a smaller rx buffer */
full_space = tcp_full_space(sk); full_space = tcp_full_space(sk);
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
......
...@@ -2056,7 +2056,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) ...@@ -2056,7 +2056,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
ssk = mptcp_subflow_tcp_sock(subflow); ssk = mptcp_subflow_tcp_sock(subflow);
slow = lock_sock_fast(ssk); slow = lock_sock_fast(ssk);
WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf); WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf);
tcp_sk(ssk)->window_clamp = window_clamp; WRITE_ONCE(tcp_sk(ssk)->window_clamp, window_clamp);
tcp_cleanup_rbuf(ssk, 1); tcp_cleanup_rbuf(ssk, 1);
unlock_sock_fast(ssk, slow); unlock_sock_fast(ssk, slow);
} }
......
...@@ -1523,7 +1523,7 @@ int mptcp_set_rcvlowat(struct sock *sk, int val) ...@@ -1523,7 +1523,7 @@ int mptcp_set_rcvlowat(struct sock *sk, int val)
slow = lock_sock_fast(ssk); slow = lock_sock_fast(ssk);
WRITE_ONCE(ssk->sk_rcvbuf, space); WRITE_ONCE(ssk->sk_rcvbuf, space);
tcp_sk(ssk)->window_clamp = val; WRITE_ONCE(tcp_sk(ssk)->window_clamp, val);
unlock_sock_fast(ssk, slow); unlock_sock_fast(ssk, slow);
} }
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment