Commit e69f145d authored by Stephen Hemminger's avatar Stephen Hemminger Committed by David S. Miller

[TCP]: Automatically compute tcp_default_win_scale.

This patch gets rid of the tcp_default_win_scale sysctl and instead
computes the optimum maximum window scale.  It just means one less
thing to have to tune.  I also moved the code out of the inline because
it gets called three places and isn't in the critical path.

As a side effect, it will cause a smaller window scale for many people
since the default tcp_rmem fits in a win_scale of 2.  This is allows for
finer grain windows (good), but may mask some of the problems with bad
implementations we have already seen (bad).
Signed-off-by: default avatarStephen Hemminger <shemminger@osdl.org>
Signed-off-by: default avatarDavid S. Miller <davem@redhat.com>
parent 288ce357
...@@ -611,7 +611,6 @@ extern int sysctl_tcp_nometrics_save; ...@@ -611,7 +611,6 @@ extern int sysctl_tcp_nometrics_save;
extern int sysctl_tcp_bic; extern int sysctl_tcp_bic;
extern int sysctl_tcp_bic_fast_convergence; extern int sysctl_tcp_bic_fast_convergence;
extern int sysctl_tcp_bic_low_window; extern int sysctl_tcp_bic_low_window;
extern int sysctl_tcp_default_win_scale;
extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_moderate_rcvbuf;
extern atomic_t tcp_memory_allocated; extern atomic_t tcp_memory_allocated;
...@@ -1690,68 +1689,10 @@ static inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack, ...@@ -1690,68 +1689,10 @@ static inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale)); *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
} }
/* Determine a window scaling and initial window to offer. /* Determine a window scaling and initial window to offer. */
* Based on the assumption that the given amount of space extern void tcp_select_initial_window(int __space, __u32 mss,
* will be offered. Store the results in the tp structure. __u32 *rcv_wnd, __u32 *window_clamp,
* NOTE: for smooth operation initial space offering should int wscale_ok, __u8 *rcv_wscale);
* be a multiple of mss if possible. We assume here that mss >= 1.
* This MUST be enforced by all callers.
*/
static inline void tcp_select_initial_window(int __space, __u32 mss,
__u32 *rcv_wnd,
__u32 *window_clamp,
int wscale_ok,
__u8 *rcv_wscale)
{
unsigned int space = (__space < 0 ? 0 : __space);
/* If no clamp set the clamp to the max possible scaled window */
if (*window_clamp == 0)
(*window_clamp) = (65535 << 14);
space = min(*window_clamp, space);
/* Quantize space offering to a multiple of mss if possible. */
if (space > mss)
space = (space / mss) * mss;
/* NOTE: offering an initial window larger than 32767
* will break some buggy TCP stacks. We try to be nice.
* If we are not window scaling, then this truncates
* our initial window offering to 32k. There should also
* be a sysctl option to stop being nice.
*/
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
(*rcv_wscale) = 0;
if (wscale_ok) {
/* See RFC1323 for an explanation of the limit to 14 */
while (space > 65535 && (*rcv_wscale) < 14) {
space >>= 1;
(*rcv_wscale)++;
}
if (*rcv_wscale && sysctl_tcp_app_win && space>=mss &&
space - max((space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2)
(*rcv_wscale)--;
*rcv_wscale = max((__u8)sysctl_tcp_default_win_scale,
*rcv_wscale);
}
/* Set initial window to value enough for senders,
* following RFC1414. Senders, not following this RFC,
* will be satisfied with 2.
*/
if (mss > (1<<*rcv_wscale)) {
int init_cwnd = 4;
if (mss > 1460*3)
init_cwnd = 2;
else if (mss > 1460)
init_cwnd = 3;
if (*rcv_wnd > init_cwnd*mss)
*rcv_wnd = init_cwnd*mss;
}
/* Set the clamp no higher than max representable value */
(*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
}
static inline int tcp_win_from_space(int space) static inline int tcp_win_from_space(int space)
{ {
...@@ -1761,13 +1702,13 @@ static inline int tcp_win_from_space(int space) ...@@ -1761,13 +1702,13 @@ static inline int tcp_win_from_space(int space)
} }
/* Note: caller must be prepared to deal with negative returns */ /* Note: caller must be prepared to deal with negative returns */
static inline int tcp_space(struct sock *sk) static inline int tcp_space(const struct sock *sk)
{ {
return tcp_win_from_space(sk->sk_rcvbuf - return tcp_win_from_space(sk->sk_rcvbuf -
atomic_read(&sk->sk_rmem_alloc)); atomic_read(&sk->sk_rmem_alloc));
} }
static inline int tcp_full_space( struct sock *sk) static inline int tcp_full_space(const struct sock *sk)
{ {
return tcp_win_from_space(sk->sk_rcvbuf); return tcp_win_from_space(sk->sk_rcvbuf);
} }
......
...@@ -666,14 +666,6 @@ ctl_table ipv4_table[] = { ...@@ -666,14 +666,6 @@ ctl_table ipv4_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec, .proc_handler = &proc_dointvec,
}, },
{
.ctl_name = NET_TCP_DEFAULT_WIN_SCALE,
.procname = "tcp_default_win_scale",
.data = &sysctl_tcp_default_win_scale,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ {
.ctl_name = NET_TCP_MODERATE_RCVBUF, .ctl_name = NET_TCP_MODERATE_RCVBUF,
.procname = "tcp_moderate_rcvbuf", .procname = "tcp_moderate_rcvbuf",
......
...@@ -276,8 +276,6 @@ kmem_cache_t *tcp_timewait_cachep; ...@@ -276,8 +276,6 @@ kmem_cache_t *tcp_timewait_cachep;
atomic_t tcp_orphan_count = ATOMIC_INIT(0); atomic_t tcp_orphan_count = ATOMIC_INIT(0);
int sysctl_tcp_default_win_scale = 7;
int sysctl_tcp_mem[3]; int sysctl_tcp_mem[3];
int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 }; int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 }; int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
......
...@@ -143,6 +143,65 @@ static __inline__ void tcp_event_ack_sent(struct sock *sk) ...@@ -143,6 +143,65 @@ static __inline__ void tcp_event_ack_sent(struct sock *sk)
tcp_clear_xmit_timer(sk, TCP_TIME_DACK); tcp_clear_xmit_timer(sk, TCP_TIME_DACK);
} }
/* Determine a window scaling and initial window to offer.
* Based on the assumption that the given amount of space
* will be offered. Store the results in the tp structure.
* NOTE: for smooth operation initial space offering should
* be a multiple of mss if possible. We assume here that mss >= 1.
* This MUST be enforced by all callers.
*/
void tcp_select_initial_window(int __space, __u32 mss,
__u32 *rcv_wnd, __u32 *window_clamp,
int wscale_ok, __u8 *rcv_wscale)
{
unsigned int space = (__space < 0 ? 0 : __space);
/* If no clamp set the clamp to the max possible scaled window */
if (*window_clamp == 0)
(*window_clamp) = (65535 << 14);
space = min(*window_clamp, space);
/* Quantize space offering to a multiple of mss if possible. */
if (space > mss)
space = (space / mss) * mss;
/* NOTE: offering an initial window larger than 32767
* will break some buggy TCP stacks. We try to be nice.
* If we are not window scaling, then this truncates
* our initial window offering to 32k. There should also
* be a sysctl option to stop being nice.
*/
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
(*rcv_wscale) = 0;
if (wscale_ok) {
/* Set window scaling on max possible window
* See RFC1323 for an explanation of the limit to 14
*/
space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
while (space > 65535 && (*rcv_wscale) < 14) {
space >>= 1;
(*rcv_wscale)++;
}
}
/* Set initial window to value enough for senders,
* following RFC1414. Senders, not following this RFC,
* will be satisfied with 2.
*/
if (mss > (1<<*rcv_wscale)) {
int init_cwnd = 4;
if (mss > 1460*3)
init_cwnd = 2;
else if (mss > 1460)
init_cwnd = 3;
if (*rcv_wnd > init_cwnd*mss)
*rcv_wnd = init_cwnd*mss;
}
/* Set the clamp no higher than max representable value */
(*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
}
/* Chose a new window to advertise, update state in tcp_opt for the /* Chose a new window to advertise, update state in tcp_opt for the
* socket, and return result with RFC1323 scaling applied. The return * socket, and return result with RFC1323 scaling applied. The return
* value can be stuffed directly into th->window for an outgoing * value can be stuffed directly into th->window for an outgoing
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment