Commit 2a1ca0d0 authored by David S. Miller's avatar David S. Miller Committed by Dmitry Torokhov

[TCP]: Add receiver side RTT estimation.

Based largely upon work by John Heffner.
parent cf8cd3d1
...@@ -376,6 +376,13 @@ struct tcp_opt { ...@@ -376,6 +376,13 @@ struct tcp_opt {
unsigned long last_synq_overflow; unsigned long last_synq_overflow;
/* Receiver side RTT estimation */
struct {
__u32 rtt;
__u32 seq;
__u32 time;
} rcv_rtt_est;
/* TCP Westwood structure */ /* TCP Westwood structure */
struct { struct {
__u32 bw_ns_est; /* first bandwidth estimation..not too smoothed 8) */ __u32 bw_ns_est; /* first bandwidth estimation..not too smoothed 8) */
......
...@@ -364,6 +364,73 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_opt *tp) ...@@ -364,6 +364,73 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_opt *tp)
} }
} }
/* Receiver "autotuning" code.
*
* The algorithm for RTT estimation w/o timestamps is based on
* Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL.
* <http://www.lanl.gov/radiant/website/pubs/drs/lacsi2001.ps>
*
* More detail on this code can be found at
* <http://www.psc.edu/~jheffner/senior_thesis.ps>,
* though this reference is out of date. A new paper
* is pending.
*/
static void tcp_rcv_rtt_update(struct tcp_opt *tp, u32 sample, int win_dep)
{
u32 new_sample = tp->rcv_rtt_est.rtt;
long m = sample;
if (m == 0)
m = 1;
if (new_sample != 0) {
/* If we sample in larger samples in the non-timestamp
* case, we could grossly overestimate the RTT especially
* with chatty applications or bulk transfer apps which
* are stalled on filesystem I/O.
*
* Also, since we are only going for a minimum in the
* non-timestamp case, we do not smoothe things out
* else with timestamps disabled convergance takes too
* long.
*/
if (!win_dep) {
m -= (new_sample >> 3);
new_sample += m;
} else if (m < new_sample)
new_sample = m << 3;
} else {
/* No previous mesaure. */
new_sample = m << 3;
}
if (tp->rcv_rtt_est.rtt != new_sample)
tp->rcv_rtt_est.rtt = new_sample;
}
static inline void tcp_rcv_rtt_measure(struct tcp_opt *tp)
{
if (tp->rcv_rtt_est.time == 0)
goto new_measure;
if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
return;
tcp_rcv_rtt_update(tp,
jiffies - tp->rcv_rtt_est.time,
1);
new_measure:
tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
tp->rcv_rtt_est.time = tcp_time_stamp;
}
static inline void tcp_rcv_rtt_measure_ts(struct tcp_opt *tp, struct sk_buff *skb)
{
if (tp->rcv_tsecr &&
(TCP_SKB_CB(skb)->end_seq -
TCP_SKB_CB(skb)->seq >= tp->ack.rcv_mss))
tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_tsecr, 0);
}
/* There is something which you must keep in mind when you analyze the /* There is something which you must keep in mind when you analyze the
* behavior of the tp->ato delayed ack timeout interval. When a * behavior of the tp->ato delayed ack timeout interval. When a
* connection starts up, we want to ack as quickly as possible. The * connection starts up, we want to ack as quickly as possible. The
...@@ -382,6 +449,8 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b ...@@ -382,6 +449,8 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b
tcp_measure_rcv_mss(tp, skb); tcp_measure_rcv_mss(tp, skb);
tcp_rcv_rtt_measure(tp);
now = tcp_time_stamp; now = tcp_time_stamp;
if (!tp->ack.ato) { if (!tp->ack.ato) {
...@@ -4045,6 +4114,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, ...@@ -4045,6 +4114,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
(sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
tp->rcv_nxt == tp->rcv_wup) tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp); tcp_store_ts_recent(tp);
tcp_rcv_rtt_measure_ts(tp, skb);
/* We know that such packets are checksummed /* We know that such packets are checksummed
* on entry. * on entry.
*/ */
...@@ -4076,6 +4148,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, ...@@ -4076,6 +4148,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tp->rcv_nxt == tp->rcv_wup) tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp); tcp_store_ts_recent(tp);
tcp_rcv_rtt_measure_ts(tp, skb);
__skb_pull(skb, tcp_header_len); __skb_pull(skb, tcp_header_len);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
NET_INC_STATS_BH(TCPHPHitsToUser); NET_INC_STATS_BH(TCPHPHitsToUser);
...@@ -4095,6 +4169,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, ...@@ -4095,6 +4169,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tp->rcv_nxt == tp->rcv_wup) tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp); tcp_store_ts_recent(tp);
tcp_rcv_rtt_measure_ts(tp, skb);
if ((int)skb->truesize > sk->sk_forward_alloc) if ((int)skb->truesize > sk->sk_forward_alloc)
goto step5; goto step5;
...@@ -4191,6 +4267,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, ...@@ -4191,6 +4267,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if(th->ack) if(th->ack)
tcp_ack(sk, skb, FLAG_SLOWPATH); tcp_ack(sk, skb, FLAG_SLOWPATH);
tcp_rcv_rtt_measure_ts(tp, skb);
/* Process urgent data. */ /* Process urgent data. */
tcp_urg(sk, skb, th); tcp_urg(sk, skb, th);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment