Commit 1828dda1 authored by David S. Miller's avatar David S. Miller

Merge nuts.davemloft.net:/disk1/BK/network-2.6

into nuts.davemloft.net:/disk1/BK/net-2.6
parents e743bea2 c5dc7491
...@@ -340,6 +340,11 @@ tcp_bic_fast_convergence - BOOLEAN ...@@ -340,6 +340,11 @@ tcp_bic_fast_convergence - BOOLEAN
more rapidly. more rapidly.
Default: 1 Default: 1
tcp_default_win_scale - INTEGER
Sets the minimum window scale TCP will negotiate for on all
conections.
Default: 7
ip_local_port_range - 2 INTEGERS ip_local_port_range - 2 INTEGERS
Defines the local port range that is used by TCP and UDP to Defines the local port range that is used by TCP and UDP to
choose the local port. The first number is the first, the choose the local port. The first number is the first, the
......
...@@ -68,19 +68,20 @@ static int crypt(struct crypto_tfm *tfm, ...@@ -68,19 +68,20 @@ static int crypt(struct crypto_tfm *tfm,
for(;;) { for(;;) {
u8 *src_p, *dst_p; u8 *src_p, *dst_p;
int in_place;
scatterwalk_map(&walk_in, 0); scatterwalk_map(&walk_in, 0);
scatterwalk_map(&walk_out, 1); scatterwalk_map(&walk_out, 1);
src_p = scatterwalk_whichbuf(&walk_in, bsize, tmp_src); src_p = scatterwalk_whichbuf(&walk_in, bsize, tmp_src);
dst_p = scatterwalk_whichbuf(&walk_out, bsize, tmp_dst); dst_p = scatterwalk_whichbuf(&walk_out, bsize, tmp_dst);
in_place = scatterwalk_samebuf(&walk_in, &walk_out,
src_p, dst_p);
nbytes -= bsize; nbytes -= bsize;
scatterwalk_copychunks(src_p, &walk_in, bsize, 0); scatterwalk_copychunks(src_p, &walk_in, bsize, 0);
prfn(tfm, dst_p, src_p, crfn, enc, info, prfn(tfm, dst_p, src_p, crfn, enc, info, in_place);
scatterwalk_samebuf(&walk_in, &walk_out,
src_p, dst_p));
scatterwalk_done(&walk_in, 0, nbytes); scatterwalk_done(&walk_in, 0, nbytes);
......
...@@ -38,6 +38,7 @@ static inline int scatterwalk_samebuf(struct scatter_walk *walk_in, ...@@ -38,6 +38,7 @@ static inline int scatterwalk_samebuf(struct scatter_walk *walk_in,
void *src_p, void *dst_p) void *src_p, void *dst_p)
{ {
return walk_in->page == walk_out->page && return walk_in->page == walk_out->page &&
walk_in->offset == walk_out->offset &&
walk_in->data == src_p && walk_out->data == dst_p; walk_in->data == src_p && walk_out->data == dst_p;
} }
......
...@@ -348,7 +348,7 @@ enum ...@@ -348,7 +348,7 @@ enum
IFA_MULTICAST IFA_MULTICAST
}; };
#define IFA_MAX IFA_CACHEINFO #define IFA_MAX IFA_MULTICAST
/* ifa_flags */ /* ifa_flags */
......
...@@ -336,6 +336,7 @@ enum ...@@ -336,6 +336,7 @@ enum
NET_TCP_BIC=102, NET_TCP_BIC=102,
NET_TCP_BIC_FAST_CONVERGENCE=103, NET_TCP_BIC_FAST_CONVERGENCE=103,
NET_TCP_BIC_LOW_WINDOW=104, NET_TCP_BIC_LOW_WINDOW=104,
NET_TCP_DEFAULT_WIN_SCALE=105,
}; };
enum { enum {
......
...@@ -376,6 +376,20 @@ struct tcp_opt { ...@@ -376,6 +376,20 @@ struct tcp_opt {
unsigned long last_synq_overflow; unsigned long last_synq_overflow;
/* Receiver side RTT estimation */
struct {
__u32 rtt;
__u32 seq;
__u32 time;
} rcv_rtt_est;
/* Receiver queue space */
struct {
int space;
__u32 seq;
__u32 time;
} rcvq_space;
/* TCP Westwood structure */ /* TCP Westwood structure */
struct { struct {
__u32 bw_ns_est; /* first bandwidth estimation..not too smoothed 8) */ __u32 bw_ns_est; /* first bandwidth estimation..not too smoothed 8) */
......
...@@ -610,6 +610,7 @@ extern int sysctl_tcp_nometrics_save; ...@@ -610,6 +610,7 @@ extern int sysctl_tcp_nometrics_save;
extern int sysctl_tcp_bic; extern int sysctl_tcp_bic;
extern int sysctl_tcp_bic_fast_convergence; extern int sysctl_tcp_bic_fast_convergence;
extern int sysctl_tcp_bic_low_window; extern int sysctl_tcp_bic_low_window;
extern int sysctl_tcp_default_win_scale;
extern atomic_t tcp_memory_allocated; extern atomic_t tcp_memory_allocated;
extern atomic_t tcp_sockets_allocated; extern atomic_t tcp_sockets_allocated;
...@@ -800,6 +801,8 @@ extern int tcp_rcv_established(struct sock *sk, ...@@ -800,6 +801,8 @@ extern int tcp_rcv_established(struct sock *sk,
struct tcphdr *th, struct tcphdr *th,
unsigned len); unsigned len);
extern void tcp_rcv_space_adjust(struct sock *sk);
enum tcp_ack_state_t enum tcp_ack_state_t
{ {
TCP_ACK_SCHED = 1, TCP_ACK_SCHED = 1,
...@@ -1751,6 +1754,9 @@ static inline void tcp_select_initial_window(int __space, __u32 mss, ...@@ -1751,6 +1754,9 @@ static inline void tcp_select_initial_window(int __space, __u32 mss,
if (*rcv_wscale && sysctl_tcp_app_win && space>=mss && if (*rcv_wscale && sysctl_tcp_app_win && space>=mss &&
space - max((space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2) space - max((space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2)
(*rcv_wscale)--; (*rcv_wscale)--;
*rcv_wscale = max((__u8)sysctl_tcp_default_win_scale,
*rcv_wscale);
} }
/* Set initial window to value enough for senders, /* Set initial window to value enough for senders,
......
...@@ -665,6 +665,14 @@ ctl_table ipv4_table[] = { ...@@ -665,6 +665,14 @@ ctl_table ipv4_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec, .proc_handler = &proc_dointvec,
}, },
{
.ctl_name = NET_TCP_DEFAULT_WIN_SCALE,
.procname = "tcp_default_win_scale",
.data = &sysctl_tcp_default_win_scale,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ .ctl_name = 0 } { .ctl_name = 0 }
}; };
......
...@@ -276,6 +276,8 @@ kmem_cache_t *tcp_timewait_cachep; ...@@ -276,6 +276,8 @@ kmem_cache_t *tcp_timewait_cachep;
atomic_t tcp_orphan_count = ATOMIC_INIT(0); atomic_t tcp_orphan_count = ATOMIC_INIT(0);
int sysctl_tcp_default_win_scale = 7;
int sysctl_tcp_mem[3]; int sysctl_tcp_mem[3];
int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 }; int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 }; int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
...@@ -1480,6 +1482,9 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, ...@@ -1480,6 +1482,9 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
break; break;
} }
tp->copied_seq = seq; tp->copied_seq = seq;
tcp_rcv_space_adjust(sk);
/* Clean up data we have read: This will do ACK frames. */ /* Clean up data we have read: This will do ACK frames. */
if (copied) if (copied)
cleanup_rbuf(sk, copied); cleanup_rbuf(sk, copied);
...@@ -1740,6 +1745,8 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ...@@ -1740,6 +1745,8 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
copied += used; copied += used;
len -= used; len -= used;
tcp_rcv_space_adjust(sk);
skip_copy: skip_copy:
if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) { if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
tp->urg_data = 0; tp->urg_data = 0;
...@@ -2675,10 +2682,6 @@ void __init tcp_init(void) ...@@ -2675,10 +2682,6 @@ void __init tcp_init(void)
sysctl_tcp_mem[0] = 768 << order; sysctl_tcp_mem[0] = 768 << order;
sysctl_tcp_mem[1] = 1024 << order; sysctl_tcp_mem[1] = 1024 << order;
sysctl_tcp_mem[2] = 1536 << order; sysctl_tcp_mem[2] = 1536 << order;
if (sysctl_tcp_mem[2] - sysctl_tcp_mem[1] > 512)
sysctl_tcp_mem[1] = sysctl_tcp_mem[2] - 512;
if (sysctl_tcp_mem[1] - sysctl_tcp_mem[0] > 512)
sysctl_tcp_mem[0] = sysctl_tcp_mem[1] - 512;
if (order < 3) { if (order < 3) {
sysctl_tcp_wmem[2] = 64 * 1024; sysctl_tcp_wmem[2] = 64 * 1024;
......
...@@ -305,6 +305,8 @@ static void tcp_init_buffer_space(struct sock *sk) ...@@ -305,6 +305,8 @@ static void tcp_init_buffer_space(struct sock *sk)
if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
tcp_fixup_sndbuf(sk); tcp_fixup_sndbuf(sk);
tp->rcvq_space.space = tp->rcv_wnd;
maxwin = tcp_full_space(sk); maxwin = tcp_full_space(sk);
if (tp->window_clamp >= maxwin) { if (tp->window_clamp >= maxwin) {
...@@ -364,6 +366,120 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_opt *tp) ...@@ -364,6 +366,120 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_opt *tp)
} }
} }
/* Receiver "autotuning" code.
*
* The algorithm for RTT estimation w/o timestamps is based on
* Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL.
* <http://www.lanl.gov/radiant/website/pubs/drs/lacsi2001.ps>
*
* More detail on this code can be found at
* <http://www.psc.edu/~jheffner/senior_thesis.ps>,
* though this reference is out of date. A new paper
* is pending.
*/
static void tcp_rcv_rtt_update(struct tcp_opt *tp, u32 sample, int win_dep)
{
u32 new_sample = tp->rcv_rtt_est.rtt;
long m = sample;
if (m == 0)
m = 1;
if (new_sample != 0) {
/* If we sample in larger samples in the non-timestamp
* case, we could grossly overestimate the RTT especially
* with chatty applications or bulk transfer apps which
* are stalled on filesystem I/O.
*
* Also, since we are only going for a minimum in the
* non-timestamp case, we do not smoothe things out
* else with timestamps disabled convergance takes too
* long.
*/
if (!win_dep) {
m -= (new_sample >> 3);
new_sample += m;
} else if (m < new_sample)
new_sample = m << 3;
} else {
/* No previous mesaure. */
new_sample = m << 3;
}
if (tp->rcv_rtt_est.rtt != new_sample)
tp->rcv_rtt_est.rtt = new_sample;
}
static inline void tcp_rcv_rtt_measure(struct tcp_opt *tp)
{
if (tp->rcv_rtt_est.time == 0)
goto new_measure;
if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
return;
tcp_rcv_rtt_update(tp,
jiffies - tp->rcv_rtt_est.time,
1);
new_measure:
tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
tp->rcv_rtt_est.time = tcp_time_stamp;
}
static inline void tcp_rcv_rtt_measure_ts(struct tcp_opt *tp, struct sk_buff *skb)
{
if (tp->rcv_tsecr &&
(TCP_SKB_CB(skb)->end_seq -
TCP_SKB_CB(skb)->seq >= tp->ack.rcv_mss))
tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_tsecr, 0);
}
/*
* This function should be called every time data is copied to user space.
* It calculates the appropriate TCP receive buffer space.
*/
void tcp_rcv_space_adjust(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
int time;
int space;
if (tp->rcvq_space.time == 0)
goto new_measure;
time = tcp_time_stamp - tp->rcvq_space.time;
if (time < (tp->rcv_rtt_est.rtt >> 3) ||
tp->rcv_rtt_est.rtt == 0)
return;
space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
space = max(tp->rcvq_space.space, space);
if (tp->rcvq_space.space != space) {
int rcvmem;
tp->rcvq_space.space = space;
/* Receive space grows, normalize in order to
* take into account packet headers and sk_buff
* structure overhead.
*/
space /= tp->advmss;
if (!space)
space = 1;
rcvmem = (tp->advmss + MAX_TCP_HEADER +
16 + sizeof(struct sk_buff));
space *= rcvmem;
space = min(space, sysctl_tcp_rmem[2]);
if (space > sk->sk_rcvbuf)
sk->sk_rcvbuf = space;
}
new_measure:
tp->rcvq_space.seq = tp->copied_seq;
tp->rcvq_space.time = tcp_time_stamp;
}
/* There is something which you must keep in mind when you analyze the /* There is something which you must keep in mind when you analyze the
* behavior of the tp->ato delayed ack timeout interval. When a * behavior of the tp->ato delayed ack timeout interval. When a
* connection starts up, we want to ack as quickly as possible. The * connection starts up, we want to ack as quickly as possible. The
...@@ -382,6 +498,8 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b ...@@ -382,6 +498,8 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b
tcp_measure_rcv_mss(tp, skb); tcp_measure_rcv_mss(tp, skb);
tcp_rcv_rtt_measure(tp);
now = tcp_time_stamp; now = tcp_time_stamp;
if (!tp->ack.ato) { if (!tp->ack.ato) {
...@@ -3318,6 +3436,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) ...@@ -3318,6 +3436,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
tp->ucopy.len -= chunk; tp->ucopy.len -= chunk;
tp->copied_seq += chunk; tp->copied_seq += chunk;
eaten = (chunk == skb->len && !th->fin); eaten = (chunk == skb->len && !th->fin);
tcp_rcv_space_adjust(sk);
} }
local_bh_disable(); local_bh_disable();
} }
...@@ -3918,6 +4037,7 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) ...@@ -3918,6 +4037,7 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
if (!err) { if (!err) {
tp->ucopy.len -= chunk; tp->ucopy.len -= chunk;
tp->copied_seq += chunk; tp->copied_seq += chunk;
tcp_rcv_space_adjust(sk);
} }
local_bh_disable(); local_bh_disable();
...@@ -4045,6 +4165,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, ...@@ -4045,6 +4165,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
(sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
tp->rcv_nxt == tp->rcv_wup) tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp); tcp_store_ts_recent(tp);
tcp_rcv_rtt_measure_ts(tp, skb);
/* We know that such packets are checksummed /* We know that such packets are checksummed
* on entry. * on entry.
*/ */
...@@ -4076,6 +4199,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, ...@@ -4076,6 +4199,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tp->rcv_nxt == tp->rcv_wup) tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp); tcp_store_ts_recent(tp);
tcp_rcv_rtt_measure_ts(tp, skb);
__skb_pull(skb, tcp_header_len); __skb_pull(skb, tcp_header_len);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
NET_INC_STATS_BH(TCPHPHitsToUser); NET_INC_STATS_BH(TCPHPHitsToUser);
...@@ -4095,6 +4220,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, ...@@ -4095,6 +4220,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tp->rcv_nxt == tp->rcv_wup) tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp); tcp_store_ts_recent(tp);
tcp_rcv_rtt_measure_ts(tp, skb);
if ((int)skb->truesize > sk->sk_forward_alloc) if ((int)skb->truesize > sk->sk_forward_alloc)
goto step5; goto step5;
...@@ -4191,6 +4318,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, ...@@ -4191,6 +4318,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if(th->ack) if(th->ack)
tcp_ack(sk, skb, FLAG_SLOWPATH); tcp_ack(sk, skb, FLAG_SLOWPATH);
tcp_rcv_rtt_measure_ts(tp, skb);
/* Process urgent data. */ /* Process urgent data. */
tcp_urg(sk, skb, th); tcp_urg(sk, skb, th);
......
...@@ -554,6 +554,8 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) ...@@ -554,6 +554,8 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) { if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
rt6->rt6i_flags |= RTF_MODIFIED; rt6->rt6i_flags |= RTF_MODIFIED;
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
dst->metrics[RTAX_MTU-1] = mtu; dst->metrics[RTAX_MTU-1] = mtu;
} }
} }
......
...@@ -236,10 +236,10 @@ static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) ...@@ -236,10 +236,10 @@ static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
{ {
struct dst_entry *path = dst->path; struct dst_entry *path = dst->path;
if (mtu >= 1280 && mtu < dst_pmtu(dst)) if (mtu >= IPV6_MIN_MTU && mtu < dst_pmtu(dst))
return; path->ops->update_pmtu(path, mtu);
path->ops->update_pmtu(path, mtu); return;
} }
struct dst_ops xfrm6_dst_ops = { struct dst_ops xfrm6_dst_ops = {
......
...@@ -271,7 +271,7 @@ struct sctp_association *sctp_association_init(struct sctp_association *asoc, ...@@ -271,7 +271,7 @@ struct sctp_association *sctp_association_init(struct sctp_association *asoc,
asoc->need_ecne = 0; asoc->need_ecne = 0;
asoc->assoc_id = (sctp_assoc_t)-1; asoc->assoc_id = (sctp_assoc_t)-1L;
/* Assume that peer would support both address types unless we are /* Assume that peer would support both address types unless we are
* told otherwise. * told otherwise.
...@@ -374,9 +374,9 @@ static void sctp_association_destroy(struct sctp_association *asoc) ...@@ -374,9 +374,9 @@ static void sctp_association_destroy(struct sctp_association *asoc)
sctp_endpoint_put(asoc->ep); sctp_endpoint_put(asoc->ep);
sock_put(asoc->base.sk); sock_put(asoc->base.sk);
if ((int)asoc->assoc_id != -1) { if ((long)asoc->assoc_id != -1L) {
spin_lock_bh(&sctp_assocs_id_lock); spin_lock_bh(&sctp_assocs_id_lock);
idr_remove(&sctp_assocs_id, (int)asoc->assoc_id); idr_remove(&sctp_assocs_id, (long)asoc->assoc_id);
spin_unlock_bh(&sctp_assocs_id_lock); spin_unlock_bh(&sctp_assocs_id_lock);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment