Commit 1828dda1 authored by David S. Miller's avatar David S. Miller

Merge nuts.davemloft.net:/disk1/BK/network-2.6

into nuts.davemloft.net:/disk1/BK/net-2.6
parents e743bea2 c5dc7491
......@@ -340,6 +340,11 @@ tcp_bic_fast_convergence - BOOLEAN
more rapidly.
Default: 1
tcp_default_win_scale - INTEGER
Sets the minimum window scale TCP will negotiate for on all
conections.
Default: 7
ip_local_port_range - 2 INTEGERS
Defines the local port range that is used by TCP and UDP to
choose the local port. The first number is the first, the
......
......@@ -68,19 +68,20 @@ static int crypt(struct crypto_tfm *tfm,
for(;;) {
u8 *src_p, *dst_p;
int in_place;
scatterwalk_map(&walk_in, 0);
scatterwalk_map(&walk_out, 1);
src_p = scatterwalk_whichbuf(&walk_in, bsize, tmp_src);
dst_p = scatterwalk_whichbuf(&walk_out, bsize, tmp_dst);
in_place = scatterwalk_samebuf(&walk_in, &walk_out,
src_p, dst_p);
nbytes -= bsize;
scatterwalk_copychunks(src_p, &walk_in, bsize, 0);
prfn(tfm, dst_p, src_p, crfn, enc, info,
scatterwalk_samebuf(&walk_in, &walk_out,
src_p, dst_p));
prfn(tfm, dst_p, src_p, crfn, enc, info, in_place);
scatterwalk_done(&walk_in, 0, nbytes);
......
......@@ -38,6 +38,7 @@ static inline int scatterwalk_samebuf(struct scatter_walk *walk_in,
void *src_p, void *dst_p)
{
return walk_in->page == walk_out->page &&
walk_in->offset == walk_out->offset &&
walk_in->data == src_p && walk_out->data == dst_p;
}
......
......@@ -348,7 +348,7 @@ enum
IFA_MULTICAST
};
#define IFA_MAX IFA_CACHEINFO
#define IFA_MAX IFA_MULTICAST
/* ifa_flags */
......
......@@ -336,6 +336,7 @@ enum
NET_TCP_BIC=102,
NET_TCP_BIC_FAST_CONVERGENCE=103,
NET_TCP_BIC_LOW_WINDOW=104,
NET_TCP_DEFAULT_WIN_SCALE=105,
};
enum {
......
......@@ -376,6 +376,20 @@ struct tcp_opt {
unsigned long last_synq_overflow;
/* Receiver side RTT estimation */
struct {
__u32 rtt;
__u32 seq;
__u32 time;
} rcv_rtt_est;
/* Receiver queue space */
struct {
int space;
__u32 seq;
__u32 time;
} rcvq_space;
/* TCP Westwood structure */
struct {
__u32 bw_ns_est; /* first bandwidth estimation..not too smoothed 8) */
......
......@@ -610,6 +610,7 @@ extern int sysctl_tcp_nometrics_save;
extern int sysctl_tcp_bic;
extern int sysctl_tcp_bic_fast_convergence;
extern int sysctl_tcp_bic_low_window;
extern int sysctl_tcp_default_win_scale;
extern atomic_t tcp_memory_allocated;
extern atomic_t tcp_sockets_allocated;
......@@ -800,6 +801,8 @@ extern int tcp_rcv_established(struct sock *sk,
struct tcphdr *th,
unsigned len);
extern void tcp_rcv_space_adjust(struct sock *sk);
enum tcp_ack_state_t
{
TCP_ACK_SCHED = 1,
......@@ -1751,6 +1754,9 @@ static inline void tcp_select_initial_window(int __space, __u32 mss,
if (*rcv_wscale && sysctl_tcp_app_win && space>=mss &&
space - max((space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2)
(*rcv_wscale)--;
*rcv_wscale = max((__u8)sysctl_tcp_default_win_scale,
*rcv_wscale);
}
/* Set initial window to value enough for senders,
......
......@@ -665,6 +665,14 @@ ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = NET_TCP_DEFAULT_WIN_SCALE,
.procname = "tcp_default_win_scale",
.data = &sysctl_tcp_default_win_scale,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ .ctl_name = 0 }
};
......
......@@ -276,6 +276,8 @@ kmem_cache_t *tcp_timewait_cachep;
atomic_t tcp_orphan_count = ATOMIC_INIT(0);
int sysctl_tcp_default_win_scale = 7;
int sysctl_tcp_mem[3];
int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
......@@ -1480,6 +1482,9 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
break;
}
tp->copied_seq = seq;
tcp_rcv_space_adjust(sk);
/* Clean up data we have read: This will do ACK frames. */
if (copied)
cleanup_rbuf(sk, copied);
......@@ -1740,6 +1745,8 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
copied += used;
len -= used;
tcp_rcv_space_adjust(sk);
skip_copy:
if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
tp->urg_data = 0;
......@@ -2675,10 +2682,6 @@ void __init tcp_init(void)
sysctl_tcp_mem[0] = 768 << order;
sysctl_tcp_mem[1] = 1024 << order;
sysctl_tcp_mem[2] = 1536 << order;
if (sysctl_tcp_mem[2] - sysctl_tcp_mem[1] > 512)
sysctl_tcp_mem[1] = sysctl_tcp_mem[2] - 512;
if (sysctl_tcp_mem[1] - sysctl_tcp_mem[0] > 512)
sysctl_tcp_mem[0] = sysctl_tcp_mem[1] - 512;
if (order < 3) {
sysctl_tcp_wmem[2] = 64 * 1024;
......
......@@ -305,6 +305,8 @@ static void tcp_init_buffer_space(struct sock *sk)
if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
tcp_fixup_sndbuf(sk);
tp->rcvq_space.space = tp->rcv_wnd;
maxwin = tcp_full_space(sk);
if (tp->window_clamp >= maxwin) {
......@@ -364,6 +366,120 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_opt *tp)
}
}
/* Receiver "autotuning" code.
*
* The algorithm for RTT estimation w/o timestamps is based on
* Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL.
* <http://www.lanl.gov/radiant/website/pubs/drs/lacsi2001.ps>
*
* More detail on this code can be found at
* <http://www.psc.edu/~jheffner/senior_thesis.ps>,
* though this reference is out of date. A new paper
* is pending.
*/
static void tcp_rcv_rtt_update(struct tcp_opt *tp, u32 sample, int win_dep)
{
u32 new_sample = tp->rcv_rtt_est.rtt;
long m = sample;
if (m == 0)
m = 1;
if (new_sample != 0) {
/* If we sample in larger samples in the non-timestamp
* case, we could grossly overestimate the RTT especially
* with chatty applications or bulk transfer apps which
* are stalled on filesystem I/O.
*
* Also, since we are only going for a minimum in the
* non-timestamp case, we do not smoothe things out
* else with timestamps disabled convergance takes too
* long.
*/
if (!win_dep) {
m -= (new_sample >> 3);
new_sample += m;
} else if (m < new_sample)
new_sample = m << 3;
} else {
/* No previous mesaure. */
new_sample = m << 3;
}
if (tp->rcv_rtt_est.rtt != new_sample)
tp->rcv_rtt_est.rtt = new_sample;
}
static inline void tcp_rcv_rtt_measure(struct tcp_opt *tp)
{
if (tp->rcv_rtt_est.time == 0)
goto new_measure;
if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
return;
tcp_rcv_rtt_update(tp,
jiffies - tp->rcv_rtt_est.time,
1);
new_measure:
tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
tp->rcv_rtt_est.time = tcp_time_stamp;
}
static inline void tcp_rcv_rtt_measure_ts(struct tcp_opt *tp, struct sk_buff *skb)
{
if (tp->rcv_tsecr &&
(TCP_SKB_CB(skb)->end_seq -
TCP_SKB_CB(skb)->seq >= tp->ack.rcv_mss))
tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_tsecr, 0);
}
/*
* This function should be called every time data is copied to user space.
* It calculates the appropriate TCP receive buffer space.
*/
void tcp_rcv_space_adjust(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
int time;
int space;
if (tp->rcvq_space.time == 0)
goto new_measure;
time = tcp_time_stamp - tp->rcvq_space.time;
if (time < (tp->rcv_rtt_est.rtt >> 3) ||
tp->rcv_rtt_est.rtt == 0)
return;
space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
space = max(tp->rcvq_space.space, space);
if (tp->rcvq_space.space != space) {
int rcvmem;
tp->rcvq_space.space = space;
/* Receive space grows, normalize in order to
* take into account packet headers and sk_buff
* structure overhead.
*/
space /= tp->advmss;
if (!space)
space = 1;
rcvmem = (tp->advmss + MAX_TCP_HEADER +
16 + sizeof(struct sk_buff));
space *= rcvmem;
space = min(space, sysctl_tcp_rmem[2]);
if (space > sk->sk_rcvbuf)
sk->sk_rcvbuf = space;
}
new_measure:
tp->rcvq_space.seq = tp->copied_seq;
tp->rcvq_space.time = tcp_time_stamp;
}
/* There is something which you must keep in mind when you analyze the
* behavior of the tp->ato delayed ack timeout interval. When a
* connection starts up, we want to ack as quickly as possible. The
......@@ -382,6 +498,8 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b
tcp_measure_rcv_mss(tp, skb);
tcp_rcv_rtt_measure(tp);
now = tcp_time_stamp;
if (!tp->ack.ato) {
......@@ -3318,6 +3436,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
tp->ucopy.len -= chunk;
tp->copied_seq += chunk;
eaten = (chunk == skb->len && !th->fin);
tcp_rcv_space_adjust(sk);
}
local_bh_disable();
}
......@@ -3918,6 +4037,7 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
if (!err) {
tp->ucopy.len -= chunk;
tp->copied_seq += chunk;
tcp_rcv_space_adjust(sk);
}
local_bh_disable();
......@@ -4045,6 +4165,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
(sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp);
tcp_rcv_rtt_measure_ts(tp, skb);
/* We know that such packets are checksummed
* on entry.
*/
......@@ -4076,6 +4199,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp);
tcp_rcv_rtt_measure_ts(tp, skb);
__skb_pull(skb, tcp_header_len);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
NET_INC_STATS_BH(TCPHPHitsToUser);
......@@ -4095,6 +4220,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp);
tcp_rcv_rtt_measure_ts(tp, skb);
if ((int)skb->truesize > sk->sk_forward_alloc)
goto step5;
......@@ -4191,6 +4318,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if(th->ack)
tcp_ack(sk, skb, FLAG_SLOWPATH);
tcp_rcv_rtt_measure_ts(tp, skb);
/* Process urgent data. */
tcp_urg(sk, skb, th);
......
......@@ -554,6 +554,8 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
rt6->rt6i_flags |= RTF_MODIFIED;
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
dst->metrics[RTAX_MTU-1] = mtu;
}
}
......
......@@ -236,10 +236,10 @@ static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
{
struct dst_entry *path = dst->path;
if (mtu >= 1280 && mtu < dst_pmtu(dst))
return;
path->ops->update_pmtu(path, mtu);
if (mtu >= IPV6_MIN_MTU && mtu < dst_pmtu(dst))
path->ops->update_pmtu(path, mtu);
return;
}
struct dst_ops xfrm6_dst_ops = {
......
......@@ -271,7 +271,7 @@ struct sctp_association *sctp_association_init(struct sctp_association *asoc,
asoc->need_ecne = 0;
asoc->assoc_id = (sctp_assoc_t)-1;
asoc->assoc_id = (sctp_assoc_t)-1L;
/* Assume that peer would support both address types unless we are
* told otherwise.
......@@ -374,9 +374,9 @@ static void sctp_association_destroy(struct sctp_association *asoc)
sctp_endpoint_put(asoc->ep);
sock_put(asoc->base.sk);
if ((int)asoc->assoc_id != -1) {
if ((long)asoc->assoc_id != -1L) {
spin_lock_bh(&sctp_assocs_id_lock);
idr_remove(&sctp_assocs_id, (int)asoc->assoc_id);
idr_remove(&sctp_assocs_id, (long)asoc->assoc_id);
spin_unlock_bh(&sctp_assocs_id_lock);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment