Commit 9af0620d authored by David S. Miller's avatar David S. Miller

Merge branch 'net-sysctl-races-part-6'

Kuniyuki Iwashima says:

====================
sysctl: Fix data-races around ipv4_net_table (Round 6, Final).

This series fixes data-races around 11 knobs after tcp_pacing_ss_ratio
ipv4_net_table, and this is the final round for ipv4_net_table.

While at it, other data-races around these related knobs are fixed.

  - decnet_mem
  - decnet_rmem
  - tipc_rmem

There are still 58 tables possibly missing some fixes under net/.

  $ grep -rnE "struct ctl_table.*?\[\] =" net/ | wc -l
  60
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3e7d18b9 96b9bd8c
...@@ -2843,18 +2843,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) ...@@ -2843,18 +2843,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
{ {
/* Does this proto have per netns sysctl_wmem ? */ /* Does this proto have per netns sysctl_wmem ? */
if (proto->sysctl_wmem_offset) if (proto->sysctl_wmem_offset)
return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset); return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset));
return *proto->sysctl_wmem; return READ_ONCE(*proto->sysctl_wmem);
} }
static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
{ {
/* Does this proto have per netns sysctl_rmem ? */ /* Does this proto have per netns sysctl_rmem ? */
if (proto->sysctl_rmem_offset) if (proto->sysctl_rmem_offset)
return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset); return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset));
return *proto->sysctl_rmem; return READ_ONCE(*proto->sysctl_rmem);
} }
/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10) /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
......
...@@ -480,8 +480,8 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf ...@@ -480,8 +480,8 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
sk->sk_family = PF_DECnet; sk->sk_family = PF_DECnet;
sk->sk_protocol = 0; sk->sk_protocol = 0;
sk->sk_allocation = gfp; sk->sk_allocation = gfp;
sk->sk_sndbuf = sysctl_decnet_wmem[1]; sk->sk_sndbuf = READ_ONCE(sysctl_decnet_wmem[1]);
sk->sk_rcvbuf = sysctl_decnet_rmem[1]; sk->sk_rcvbuf = READ_ONCE(sysctl_decnet_rmem[1]);
/* Initialization of DECnet Session Control Port */ /* Initialization of DECnet Session Control Port */
scp = DN_SK(sk); scp = DN_SK(sk);
......
...@@ -1042,6 +1042,7 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri) ...@@ -1042,6 +1042,7 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
{ {
u8 fib_notify_on_flag_change;
struct fib_alias *fa_match; struct fib_alias *fa_match;
struct sk_buff *skb; struct sk_buff *skb;
int err; int err;
...@@ -1063,14 +1064,16 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) ...@@ -1063,14 +1064,16 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
WRITE_ONCE(fa_match->offload, fri->offload); WRITE_ONCE(fa_match->offload, fri->offload);
WRITE_ONCE(fa_match->trap, fri->trap); WRITE_ONCE(fa_match->trap, fri->trap);
fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change);
/* 2 means send notifications only if offload_failed was changed. */ /* 2 means send notifications only if offload_failed was changed. */
if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 && if (fib_notify_on_flag_change == 2 &&
READ_ONCE(fa_match->offload_failed) == fri->offload_failed) READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
goto out; goto out;
WRITE_ONCE(fa_match->offload_failed, fri->offload_failed); WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);
if (!net->ipv4.sysctl_fib_notify_on_flag_change) if (!fib_notify_on_flag_change)
goto out; goto out;
skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC); skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC);
......
...@@ -452,8 +452,8 @@ void tcp_init_sock(struct sock *sk) ...@@ -452,8 +452,8 @@ void tcp_init_sock(struct sock *sk)
icsk->icsk_sync_mss = tcp_sync_mss; icsk->icsk_sync_mss = tcp_sync_mss;
WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]); WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
sk_sockets_allocated_inc(sk); sk_sockets_allocated_inc(sk);
} }
...@@ -1724,7 +1724,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val) ...@@ -1724,7 +1724,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
cap = sk->sk_rcvbuf >> 1; cap = sk->sk_rcvbuf >> 1;
else else
cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1; cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
val = min(val, cap); val = min(val, cap);
WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
......
...@@ -426,7 +426,7 @@ static void tcp_sndbuf_expand(struct sock *sk) ...@@ -426,7 +426,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
if (sk->sk_sndbuf < sndmem) if (sk->sk_sndbuf < sndmem)
WRITE_ONCE(sk->sk_sndbuf, WRITE_ONCE(sk->sk_sndbuf,
min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2])); min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2])));
} }
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh) /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
...@@ -461,7 +461,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb, ...@@ -461,7 +461,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */ /* Optimize this! */
int truesize = tcp_win_from_space(sk, skbtruesize) >> 1; int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1;
while (tp->rcv_ssthresh <= window) { while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len) if (truesize <= skb->len)
...@@ -574,16 +574,17 @@ static void tcp_clamp_window(struct sock *sk) ...@@ -574,16 +574,17 @@ static void tcp_clamp_window(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
int rmem2;
icsk->icsk_ack.quick = 0; icsk->icsk_ack.quick = 0;
rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] && if (sk->sk_rcvbuf < rmem2 &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_under_memory_pressure(sk) && !tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
WRITE_ONCE(sk->sk_rcvbuf, WRITE_ONCE(sk->sk_rcvbuf,
min(atomic_read(&sk->sk_rmem_alloc), min(atomic_read(&sk->sk_rmem_alloc), rmem2));
net->ipv4.sysctl_tcp_rmem[2]));
} }
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
...@@ -745,7 +746,7 @@ void tcp_rcv_space_adjust(struct sock *sk) ...@@ -745,7 +746,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
do_div(rcvwin, tp->advmss); do_div(rcvwin, tp->advmss);
rcvbuf = min_t(u64, rcvwin * rcvmem, rcvbuf = min_t(u64, rcvwin * rcvmem,
sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) { if (rcvbuf > sk->sk_rcvbuf) {
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
...@@ -910,9 +911,9 @@ static void tcp_update_pacing_rate(struct sock *sk) ...@@ -910,9 +911,9 @@ static void tcp_update_pacing_rate(struct sock *sk)
* end of slow start and should slow down. * end of slow start and should slow down.
*/ */
if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2) if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio; rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio);
else else
rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio; rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio);
rate *= max(tcp_snd_cwnd(tp), tp->packets_out); rate *= max(tcp_snd_cwnd(tp), tp->packets_out);
...@@ -5520,7 +5521,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) ...@@ -5520,7 +5521,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
} }
if (!tcp_is_sack(tp) || if (!tcp_is_sack(tp) ||
tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr) tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
goto send_now; goto send_now;
if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) { if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
...@@ -5541,11 +5542,12 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) ...@@ -5541,11 +5542,12 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
if (tp->srtt_us && tp->srtt_us < rtt) if (tp->srtt_us && tp->srtt_us < rtt)
rtt = tp->srtt_us; rtt = tp->srtt_us;
delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns, delay = min_t(unsigned long,
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
rtt * (NSEC_PER_USEC >> 3)/20); rtt * (NSEC_PER_USEC >> 3)/20);
sock_hold(sk); sock_hold(sk);
hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay), hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
HRTIMER_MODE_REL_PINNED_SOFT); HRTIMER_MODE_REL_PINNED_SOFT);
} }
......
...@@ -1006,7 +1006,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, ...@@ -1006,7 +1006,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
if (skb) { if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
(inet_sk(sk)->tos & INET_ECN_MASK) : (inet_sk(sk)->tos & INET_ECN_MASK) :
inet_sk(sk)->tos; inet_sk(sk)->tos;
...@@ -1526,7 +1526,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, ...@@ -1526,7 +1526,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
/* Set ToS of the new socket based upon the value of incoming SYN. /* Set ToS of the new socket based upon the value of incoming SYN.
* ECT bits are set later in tcp_init_transfer(). * ECT bits are set later in tcp_init_transfer().
*/ */
if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
if (!dst) { if (!dst) {
......
...@@ -238,7 +238,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, ...@@ -238,7 +238,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
*rcv_wscale = 0; *rcv_wscale = 0;
if (wscale_ok) { if (wscale_ok) {
/* Set window scaling on max possible window */ /* Set window scaling on max possible window */
space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
space = max_t(u32, space, sysctl_rmem_max); space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp); space = min_t(u32, space, *window_clamp);
*rcv_wscale = clamp_t(int, ilog2(space) - 15, *rcv_wscale = clamp_t(int, ilog2(space) - 15,
......
...@@ -546,7 +546,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, ...@@ -546,7 +546,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
if (np->repflow && ireq->pktopts) if (np->repflow && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
(np->tclass & INET_ECN_MASK) : (np->tclass & INET_ECN_MASK) :
np->tclass; np->tclass;
...@@ -1314,7 +1314,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * ...@@ -1314,7 +1314,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
/* Set ToS of the new socket based upon the value of incoming SYN. /* Set ToS of the new socket based upon the value of incoming SYN.
* ECT bits are set later in tcp_init_transfer(). * ECT bits are set later in tcp_init_transfer().
*/ */
if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
/* Clone native IPv6 options from listening socket (if any) /* Clone native IPv6 options from listening socket (if any)
......
...@@ -1926,7 +1926,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) ...@@ -1926,7 +1926,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
do_div(rcvwin, advmss); do_div(rcvwin, advmss);
rcvbuf = min_t(u64, rcvwin * rcvmem, rcvbuf = min_t(u64, rcvwin * rcvmem,
sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) { if (rcvbuf > sk->sk_rcvbuf) {
u32 window_clamp; u32 window_clamp;
...@@ -2669,8 +2669,8 @@ static int mptcp_init_sock(struct sock *sk) ...@@ -2669,8 +2669,8 @@ static int mptcp_init_sock(struct sock *sk)
mptcp_ca_reset(sk); mptcp_ca_reset(sk);
sk_sockets_allocated_inc(sk); sk_sockets_allocated_inc(sk);
sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
return 0; return 0;
} }
......
...@@ -517,7 +517,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, ...@@ -517,7 +517,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
timer_setup(&sk->sk_timer, tipc_sk_timeout, 0); timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
sk->sk_shutdown = 0; sk->sk_shutdown = 0;
sk->sk_backlog_rcv = tipc_sk_backlog_rcv; sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]);
sk->sk_data_ready = tipc_data_ready; sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space; sk->sk_write_space = tipc_write_space;
sk->sk_destruct = tipc_sock_destruct; sk->sk_destruct = tipc_sock_destruct;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment