Commit 010b64f7 authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-move-14-sysctls-to-namespaces'

Eric Dumazet says:

====================
tcp: move 14 sysctls to namespaces

Ideally all TCP sysctls should be per netns.
This patch series takes care of 14 of sysctls.
More to come later.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4dc12ffe af9b69a7
...@@ -128,6 +128,20 @@ struct netns_ipv4 { ...@@ -128,6 +128,20 @@ struct netns_ipv4 {
int sysctl_tcp_sack; int sysctl_tcp_sack;
int sysctl_tcp_window_scaling; int sysctl_tcp_window_scaling;
int sysctl_tcp_timestamps; int sysctl_tcp_timestamps;
int sysctl_tcp_early_retrans;
int sysctl_tcp_recovery;
int sysctl_tcp_thin_linear_timeouts;
int sysctl_tcp_slow_start_after_idle;
int sysctl_tcp_retrans_collapse;
int sysctl_tcp_stdurg;
int sysctl_tcp_rfc1337;
int sysctl_tcp_abort_on_overflow;
int sysctl_tcp_fack;
int sysctl_tcp_max_reordering;
int sysctl_tcp_dsack;
int sysctl_tcp_app_win;
int sysctl_tcp_adv_win_scale;
int sysctl_tcp_frto;
struct inet_timewait_death_row tcp_death_row; struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog; int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen; int sysctl_tcp_fastopen;
......
...@@ -243,30 +243,15 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); ...@@ -243,30 +243,15 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
/* sysctl variables for tcp */ /* sysctl variables for tcp */
extern int sysctl_tcp_retrans_collapse;
extern int sysctl_tcp_stdurg;
extern int sysctl_tcp_rfc1337;
extern int sysctl_tcp_abort_on_overflow;
extern int sysctl_tcp_max_orphans; extern int sysctl_tcp_max_orphans;
extern int sysctl_tcp_fack;
extern int sysctl_tcp_reordering;
extern int sysctl_tcp_max_reordering;
extern int sysctl_tcp_dsack;
extern long sysctl_tcp_mem[3]; extern long sysctl_tcp_mem[3];
extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_wmem[3];
extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_rmem[3];
extern int sysctl_tcp_app_win;
extern int sysctl_tcp_adv_win_scale;
extern int sysctl_tcp_frto;
extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_nometrics_save;
extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_moderate_rcvbuf;
extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_tso_win_divisor;
extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_workaround_signed_windows;
extern int sysctl_tcp_slow_start_after_idle;
extern int sysctl_tcp_thin_linear_timeouts;
extern int sysctl_tcp_thin_dupack;
extern int sysctl_tcp_early_retrans;
extern int sysctl_tcp_recovery;
#define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */
extern int sysctl_tcp_limit_output_bytes; extern int sysctl_tcp_limit_output_bytes;
...@@ -1311,7 +1296,7 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) ...@@ -1311,7 +1296,7 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
s32 delta; s32 delta;
if (!sysctl_tcp_slow_start_after_idle || tp->packets_out || if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out ||
ca_ops->cong_control) ca_ops->cong_control)
return; return;
delta = tcp_jiffies32 - tp->lsndtime; delta = tcp_jiffies32 - tp->lsndtime;
...@@ -1324,9 +1309,9 @@ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, ...@@ -1324,9 +1309,9 @@ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd,
__u32 *window_clamp, int wscale_ok, __u32 *window_clamp, int wscale_ok,
__u8 *rcv_wscale, __u32 init_rcv_wnd); __u8 *rcv_wscale, __u32 init_rcv_wnd);
static inline int tcp_win_from_space(int space) static inline int tcp_win_from_space(const struct sock *sk, int space)
{ {
int tcp_adv_win_scale = sysctl_tcp_adv_win_scale; int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
return tcp_adv_win_scale <= 0 ? return tcp_adv_win_scale <= 0 ?
(space>>(-tcp_adv_win_scale)) : (space>>(-tcp_adv_win_scale)) :
...@@ -1336,13 +1321,13 @@ static inline int tcp_win_from_space(int space) ...@@ -1336,13 +1321,13 @@ static inline int tcp_win_from_space(int space)
/* Note: caller must be prepared to deal with negative returns */ /* Note: caller must be prepared to deal with negative returns */
static inline int tcp_space(const struct sock *sk) static inline int tcp_space(const struct sock *sk)
{ {
return tcp_win_from_space(sk->sk_rcvbuf - return tcp_win_from_space(sk, sk->sk_rcvbuf -
atomic_read(&sk->sk_rmem_alloc)); atomic_read(&sk->sk_rmem_alloc));
} }
static inline int tcp_full_space(const struct sock *sk) static inline int tcp_full_space(const struct sock *sk)
{ {
return tcp_win_from_space(sk->sk_rcvbuf); return tcp_win_from_space(sk, sk->sk_rcvbuf);
} }
extern void tcp_openreq_init_rwin(struct request_sock *req, extern void tcp_openreq_init_rwin(struct request_sock *req,
......
...@@ -386,13 +386,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, ...@@ -386,13 +386,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
} }
static struct ctl_table ipv4_table[] = { static struct ctl_table ipv4_table[] = {
{
.procname = "tcp_retrans_collapse",
.data = &sysctl_tcp_retrans_collapse,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{ {
.procname = "tcp_max_orphans", .procname = "tcp_max_orphans",
.data = &sysctl_tcp_max_orphans, .data = &sysctl_tcp_max_orphans,
...@@ -400,27 +393,6 @@ static struct ctl_table ipv4_table[] = { ...@@ -400,27 +393,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec .proc_handler = proc_dointvec
}, },
{
.procname = "tcp_abort_on_overflow",
.data = &sysctl_tcp_abort_on_overflow,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_stdurg",
.data = &sysctl_tcp_stdurg,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_rfc1337",
.data = &sysctl_tcp_rfc1337,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{ {
.procname = "inet_peer_threshold", .procname = "inet_peer_threshold",
.data = &inet_peer_threshold, .data = &inet_peer_threshold,
...@@ -442,34 +414,6 @@ static struct ctl_table ipv4_table[] = { ...@@ -442,34 +414,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec_jiffies, .proc_handler = proc_dointvec_jiffies,
}, },
{
.procname = "tcp_fack",
.data = &sysctl_tcp_fack,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_recovery",
.data = &sysctl_tcp_recovery,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_max_reordering",
.data = &sysctl_tcp_max_reordering,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_dsack",
.data = &sysctl_tcp_dsack,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{ {
.procname = "tcp_mem", .procname = "tcp_mem",
.maxlen = sizeof(sysctl_tcp_mem), .maxlen = sizeof(sysctl_tcp_mem),
...@@ -493,29 +437,6 @@ static struct ctl_table ipv4_table[] = { ...@@ -493,29 +437,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec_minmax, .proc_handler = proc_dointvec_minmax,
.extra1 = &one, .extra1 = &one,
}, },
{
.procname = "tcp_app_win",
.data = &sysctl_tcp_app_win,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_adv_win_scale",
.data = &sysctl_tcp_adv_win_scale,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &tcp_adv_win_scale_min,
.extra2 = &tcp_adv_win_scale_max,
},
{
.procname = "tcp_frto",
.data = &sysctl_tcp_frto,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{ {
.procname = "tcp_min_rtt_wlen", .procname = "tcp_min_rtt_wlen",
.data = &sysctl_tcp_min_rtt_wlen, .data = &sysctl_tcp_min_rtt_wlen,
...@@ -578,13 +499,6 @@ static struct ctl_table ipv4_table[] = { ...@@ -578,13 +499,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec .proc_handler = proc_dointvec
}, },
{
.procname = "tcp_slow_start_after_idle",
.data = &sysctl_tcp_slow_start_after_idle,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
#ifdef CONFIG_NETLABEL #ifdef CONFIG_NETLABEL
{ {
.procname = "cipso_cache_enable", .procname = "cipso_cache_enable",
...@@ -627,22 +541,6 @@ static struct ctl_table ipv4_table[] = { ...@@ -627,22 +541,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_allowed_congestion_control, .proc_handler = proc_allowed_congestion_control,
}, },
{
.procname = "tcp_thin_linear_timeouts",
.data = &sysctl_tcp_thin_linear_timeouts,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_early_retrans",
.data = &sysctl_tcp_early_retrans,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &four,
},
{ {
.procname = "tcp_min_tso_segs", .procname = "tcp_min_tso_segs",
.data = &sysctl_tcp_min_tso_segs, .data = &sysctl_tcp_min_tso_segs,
...@@ -1145,6 +1043,108 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -1145,6 +1043,108 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec .proc_handler = proc_dointvec
}, },
{
.procname = "tcp_early_retrans",
.data = &init_net.ipv4.sysctl_tcp_early_retrans,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &four,
},
{
.procname = "tcp_recovery",
.data = &init_net.ipv4.sysctl_tcp_recovery,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_thin_linear_timeouts",
.data = &init_net.ipv4.sysctl_tcp_thin_linear_timeouts,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_slow_start_after_idle",
.data = &init_net.ipv4.sysctl_tcp_slow_start_after_idle,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_retrans_collapse",
.data = &init_net.ipv4.sysctl_tcp_retrans_collapse,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_stdurg",
.data = &init_net.ipv4.sysctl_tcp_stdurg,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_rfc1337",
.data = &init_net.ipv4.sysctl_tcp_rfc1337,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_abort_on_overflow",
.data = &init_net.ipv4.sysctl_tcp_abort_on_overflow,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_fack",
.data = &init_net.ipv4.sysctl_tcp_fack,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_max_reordering",
.data = &init_net.ipv4.sysctl_tcp_max_reordering,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_dsack",
.data = &init_net.ipv4.sysctl_tcp_dsack,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_app_win",
.data = &init_net.ipv4.sysctl_tcp_app_win,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_adv_win_scale",
.data = &init_net.ipv4.sysctl_tcp_adv_win_scale,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &tcp_adv_win_scale_min,
.extra2 = &tcp_adv_win_scale_max,
},
{
.procname = "tcp_frto",
.data = &init_net.ipv4.sysctl_tcp_frto,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{ } { }
}; };
......
...@@ -2517,7 +2517,7 @@ static int tcp_repair_options_est(struct sock *sk, ...@@ -2517,7 +2517,7 @@ static int tcp_repair_options_est(struct sock *sk,
return -EINVAL; return -EINVAL;
tp->rx_opt.sack_ok |= TCP_SACK_SEEN; tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
if (sysctl_tcp_fack) if (sock_net(sk)->ipv4.sysctl_tcp_fack)
tcp_enable_fack(tp); tcp_enable_fack(tp);
break; break;
case TCPOPT_TIMESTAMP: case TCPOPT_TIMESTAMP:
......
...@@ -79,23 +79,12 @@ ...@@ -79,23 +79,12 @@
#include <linux/unaligned/access_ok.h> #include <linux/unaligned/access_ok.h>
#include <linux/static_key.h> #include <linux/static_key.h>
int sysctl_tcp_fack __read_mostly;
int sysctl_tcp_max_reordering __read_mostly = 300;
int sysctl_tcp_dsack __read_mostly = 1;
int sysctl_tcp_app_win __read_mostly = 31;
int sysctl_tcp_adv_win_scale __read_mostly = 1;
EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
/* rfc5961 challenge ack rate limiting */ /* rfc5961 challenge ack rate limiting */
int sysctl_tcp_challenge_ack_limit = 1000; int sysctl_tcp_challenge_ack_limit = 1000;
int sysctl_tcp_stdurg __read_mostly;
int sysctl_tcp_rfc1337 __read_mostly;
int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
int sysctl_tcp_frto __read_mostly = 2;
int sysctl_tcp_min_rtt_wlen __read_mostly = 300; int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
int sysctl_tcp_early_retrans __read_mostly = 3;
int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_DATA 0x01 /* Incoming frame contained data. */
...@@ -370,8 +359,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) ...@@ -370,8 +359,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */ /* Optimize this! */
int truesize = tcp_win_from_space(skb->truesize) >> 1; int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1; int window = tcp_win_from_space(sk, sysctl_tcp_rmem[2]) >> 1;
while (tp->rcv_ssthresh <= window) { while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len) if (truesize <= skb->len)
...@@ -396,7 +385,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) ...@@ -396,7 +385,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
/* Check #2. Increase window, if skb with such overhead /* Check #2. Increase window, if skb with such overhead
* will fit to rcvbuf in future. * will fit to rcvbuf in future.
*/ */
if (tcp_win_from_space(skb->truesize) <= skb->len) if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
incr = 2 * tp->advmss; incr = 2 * tp->advmss;
else else
incr = __tcp_grow_window(sk, skb); incr = __tcp_grow_window(sk, skb);
...@@ -434,6 +423,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) ...@@ -434,6 +423,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
*/ */
void tcp_init_buffer_space(struct sock *sk) void tcp_init_buffer_space(struct sock *sk)
{ {
int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
int maxwin; int maxwin;
...@@ -452,14 +442,14 @@ void tcp_init_buffer_space(struct sock *sk) ...@@ -452,14 +442,14 @@ void tcp_init_buffer_space(struct sock *sk)
if (tp->window_clamp >= maxwin) { if (tp->window_clamp >= maxwin) {
tp->window_clamp = maxwin; tp->window_clamp = maxwin;
if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss) if (tcp_app_win && maxwin > 4 * tp->advmss)
tp->window_clamp = max(maxwin - tp->window_clamp = max(maxwin -
(maxwin >> sysctl_tcp_app_win), (maxwin >> tcp_app_win),
4 * tp->advmss); 4 * tp->advmss);
} }
/* Force reservation of one segment. */ /* Force reservation of one segment. */
if (sysctl_tcp_app_win && if (tcp_app_win &&
tp->window_clamp > 2 * tp->advmss && tp->window_clamp > 2 * tp->advmss &&
tp->window_clamp + tp->advmss > maxwin) tp->window_clamp + tp->advmss > maxwin)
tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss); tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
...@@ -636,7 +626,7 @@ void tcp_rcv_space_adjust(struct sock *sk) ...@@ -636,7 +626,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
} }
rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
while (tcp_win_from_space(rcvmem) < tp->advmss) while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
rcvmem += 128; rcvmem += 128;
rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]); rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
...@@ -893,7 +883,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric, ...@@ -893,7 +883,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
return; return;
if (metric > tp->reordering) { if (metric > tp->reordering) {
tp->reordering = min(sysctl_tcp_max_reordering, metric); tp->reordering = min(sock_net(sk)->ipv4.sysctl_tcp_max_reordering, metric);
#if FASTRETRANS_DEBUG > 1 #if FASTRETRANS_DEBUG > 1
pr_debug("Disorder%d %d %u f%u s%u rr%d\n", pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
...@@ -2035,7 +2025,7 @@ void tcp_enter_loss(struct sock *sk) ...@@ -2035,7 +2025,7 @@ void tcp_enter_loss(struct sock *sk)
* falsely raise the receive window, which results in repeated * falsely raise the receive window, which results in repeated
* timeouts and stop-and-go behavior. * timeouts and stop-and-go behavior.
*/ */
tp->frto = sysctl_tcp_frto && tp->frto = net->ipv4.sysctl_tcp_frto &&
(new_recovery || icsk->icsk_retransmits) && (new_recovery || icsk->icsk_retransmits) &&
!inet_csk(sk)->icsk_mtup.probe_size; !inet_csk(sk)->icsk_mtup.probe_size;
} }
...@@ -2789,7 +2779,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag) ...@@ -2789,7 +2779,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
/* Use RACK to detect loss */ /* Use RACK to detect loss */
if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) { if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
u32 prior_retrans = tp->retrans_out; u32 prior_retrans = tp->retrans_out;
tcp_rack_mark_lost(sk); tcp_rack_mark_lost(sk);
...@@ -4155,7 +4145,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) ...@@ -4155,7 +4145,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
if (tcp_is_sack(tp) && sysctl_tcp_dsack) { if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
int mib_idx; int mib_idx;
if (before(seq, tp->rcv_nxt)) if (before(seq, tp->rcv_nxt))
...@@ -4190,7 +4180,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) ...@@ -4190,7 +4180,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk); tcp_enter_quickack_mode(sk);
if (tcp_is_sack(tp) && sysctl_tcp_dsack) { if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq; u32 end_seq = TCP_SKB_CB(skb)->end_seq;
if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
...@@ -4815,7 +4805,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, ...@@ -4815,7 +4805,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
* overlaps to the next one. * overlaps to the next one.
*/ */
if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) && if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
(tcp_win_from_space(skb->truesize) > skb->len || (tcp_win_from_space(sk, skb->truesize) > skb->len ||
before(TCP_SKB_CB(skb)->seq, start))) { before(TCP_SKB_CB(skb)->seq, start))) {
end_of_skbs = false; end_of_skbs = false;
break; break;
...@@ -5124,7 +5114,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) ...@@ -5124,7 +5114,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr); u32 ptr = ntohs(th->urg_ptr);
if (ptr && !sysctl_tcp_stdurg) if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
ptr--; ptr--;
ptr += ntohl(th->seq); ptr += ntohl(th->seq);
...@@ -5723,7 +5713,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -5723,7 +5713,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tp->tcp_header_len = sizeof(struct tcphdr); tp->tcp_header_len = sizeof(struct tcphdr);
} }
if (tcp_is_sack(tp) && sysctl_tcp_fack) if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_fack)
tcp_enable_fack(tp); tcp_enable_fack(tp);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
......
...@@ -2484,6 +2484,15 @@ static int __net_init tcp_sk_init(struct net *net) ...@@ -2484,6 +2484,15 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_sack = 1; net->ipv4.sysctl_tcp_sack = 1;
net->ipv4.sysctl_tcp_window_scaling = 1; net->ipv4.sysctl_tcp_window_scaling = 1;
net->ipv4.sysctl_tcp_timestamps = 1; net->ipv4.sysctl_tcp_timestamps = 1;
net->ipv4.sysctl_tcp_early_retrans = 3;
net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */
net->ipv4.sysctl_tcp_retrans_collapse = 1;
net->ipv4.sysctl_tcp_max_reordering = 300;
net->ipv4.sysctl_tcp_dsack = 1;
net->ipv4.sysctl_tcp_app_win = 31;
net->ipv4.sysctl_tcp_adv_win_scale = 1;
net->ipv4.sysctl_tcp_frto = 2;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
......
...@@ -29,8 +29,6 @@ ...@@ -29,8 +29,6 @@
#include <net/xfrm.h> #include <net/xfrm.h>
#include <net/busy_poll.h> #include <net/busy_poll.h>
int sysctl_tcp_abort_on_overflow __read_mostly;
static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
{ {
if (seq == s_win) if (seq == s_win)
...@@ -181,7 +179,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, ...@@ -181,7 +179,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
* Oh well... nobody has a sufficient solution to this * Oh well... nobody has a sufficient solution to this
* protocol bug yet. * protocol bug yet.
*/ */
if (sysctl_tcp_rfc1337 == 0) { if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
kill: kill:
inet_twsk_deschedule_put(tw); inet_twsk_deschedule_put(tw);
return TCP_TW_SUCCESS; return TCP_TW_SUCCESS;
...@@ -512,7 +510,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, ...@@ -512,7 +510,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
if (sysctl_tcp_fack) if (sock_net(sk)->ipv4.sysctl_tcp_fack)
tcp_enable_fack(newtp); tcp_enable_fack(newtp);
} }
newtp->window_clamp = req->rsk_window_clamp; newtp->window_clamp = req->rsk_window_clamp;
...@@ -783,7 +781,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, ...@@ -783,7 +781,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
return inet_csk_complete_hashdance(sk, child, req, own_req); return inet_csk_complete_hashdance(sk, child, req, own_req);
listen_overflow: listen_overflow:
if (!sysctl_tcp_abort_on_overflow) { if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
inet_rsk(req)->acked = 1; inet_rsk(req)->acked = 1;
return NULL; return NULL;
} }
......
...@@ -45,9 +45,6 @@ ...@@ -45,9 +45,6 @@
#include <trace/events/tcp.h> #include <trace/events/tcp.h>
/* People can turn this off for buggy TCP's found in printers etc. */
int sysctl_tcp_retrans_collapse __read_mostly = 1;
/* People can turn this on to work with those rare, broken TCPs that /* People can turn this on to work with those rare, broken TCPs that
* interpret the window field as a signed quantity. * interpret the window field as a signed quantity.
*/ */
...@@ -62,9 +59,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 262144; ...@@ -62,9 +59,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
*/ */
int sysctl_tcp_tso_win_divisor __read_mostly = 3; int sysctl_tcp_tso_win_divisor __read_mostly = 3;
/* By default, RFC2861 behavior. */
int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
int push_one, gfp_t gfp); int push_one, gfp_t gfp);
...@@ -1690,7 +1684,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) ...@@ -1690,7 +1684,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
if (tp->packets_out > tp->snd_cwnd_used) if (tp->packets_out > tp->snd_cwnd_used)
tp->snd_cwnd_used = tp->packets_out; tp->snd_cwnd_used = tp->packets_out;
if (sysctl_tcp_slow_start_after_idle && if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
(s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
!ca_ops->cong_control) !ca_ops->cong_control)
tcp_cwnd_application_limited(sk); tcp_cwnd_application_limited(sk);
...@@ -2435,6 +2429,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) ...@@ -2435,6 +2429,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
u32 timeout, rto_delta_us; u32 timeout, rto_delta_us;
int early_retrans;
/* Don't do any loss probe on a Fast Open connection before 3WHS /* Don't do any loss probe on a Fast Open connection before 3WHS
* finishes. * finishes.
...@@ -2442,10 +2437,11 @@ bool tcp_schedule_loss_probe(struct sock *sk) ...@@ -2442,10 +2437,11 @@ bool tcp_schedule_loss_probe(struct sock *sk)
if (tp->fastopen_rsk) if (tp->fastopen_rsk)
return false; return false;
early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
/* Schedule a loss probe in 2*RTT for SACK capable connections /* Schedule a loss probe in 2*RTT for SACK capable connections
* in Open state, that are either limited by cwnd or application. * in Open state, that are either limited by cwnd or application.
*/ */
if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) || if ((early_retrans != 3 && early_retrans != 4) ||
!tp->packets_out || !tcp_is_sack(tp) || !tp->packets_out || !tcp_is_sack(tp) ||
icsk->icsk_ca_state != TCP_CA_Open) icsk->icsk_ca_state != TCP_CA_Open)
return false; return false;
...@@ -2805,7 +2801,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, ...@@ -2805,7 +2801,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
struct sk_buff *skb = to, *tmp; struct sk_buff *skb = to, *tmp;
bool first = true; bool first = true;
if (!sysctl_tcp_retrans_collapse) if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
return; return;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return; return;
......
#include <linux/tcp.h> #include <linux/tcp.h>
#include <net/tcp.h> #include <net/tcp.h>
int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOSS_DETECTION;
static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
......
...@@ -22,8 +22,6 @@ ...@@ -22,8 +22,6 @@
#include <linux/gfp.h> #include <linux/gfp.h>
#include <net/tcp.h> #include <net/tcp.h>
int sysctl_tcp_thin_linear_timeouts __read_mostly;
/** /**
* tcp_write_err() - close socket and save error info * tcp_write_err() - close socket and save error info
* @sk: The socket the error has appeared on. * @sk: The socket the error has appeared on.
...@@ -522,7 +520,7 @@ void tcp_retransmit_timer(struct sock *sk) ...@@ -522,7 +520,7 @@ void tcp_retransmit_timer(struct sock *sk)
* linear-timeout retransmissions into a black hole * linear-timeout retransmissions into a black hole
*/ */
if (sk->sk_state == TCP_ESTABLISHED && if (sk->sk_state == TCP_ESTABLISHED &&
(tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
tcp_stream_is_thin(tp) && tcp_stream_is_thin(tp) &&
icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
icsk->icsk_backoff = 0; icsk->icsk_backoff = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment