[TCP]: Add receiver side RTT estimation.

Based largely upon work by John Heffner.

[TCP]: Add receiver side RTT estimation.
Based largely upon work by John Heffner.
2a1ca0d0 · David S. Miller · Dmitry Torokhov · cf8cd3d1 · 2a1ca0d0 · 2a1ca0d0
Commit 2a1ca0d0 authored May 27, 2004 by David S. Miller Committed by Dmitry Torokhov May 27, 2004
Show whitespace changes
Inline Side-by-side

Showing with 85 additions and 0 deletions

include/linux/tcp.h include/linux/tcp.h +7 -0

net/ipv4/tcp_input.c net/ipv4/tcp_input.c +78 -0

No files found.
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -376,6 +376,13 @@ struct tcp_opt {
 	unsigned long last_synq_overflow; 
+/* Receiver side RTT estimation */
+	struct {
+		__u32	rtt;
+		__u32	seq;
+		__u32	time;
+	} rcv_rtt_est;
 /* TCP Westwood structure */
        struct {
                __u32    bw_ns_est;        /* first bandwidth estimation..not too smoothed 8) */

--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -364,6 +364,73 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_opt *tp)
 	}
 }
+/* Receiver "autotuning" code.
+ *
+ * The algorithm for RTT estimation w/o timestamps is based on
+ * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL.
+ * <http://www.lanl.gov/radiant/website/pubs/drs/lacsi2001.ps>
+ *
+ * More detail on this code can be found at
+ * <http://www.psc.edu/~jheffner/senior_thesis.ps>,
+ * though this reference is out of date.  A new paper
+ * is pending.
+ */
+static void tcp_rcv_rtt_update(struct tcp_opt *tp, u32 sample, int win_dep)
+{
+	u32 new_sample = tp->rcv_rtt_est.rtt;
+	long m = sample;
+	if (m == 0)
+		m = 1;
+	if (new_sample != 0) {
+		/* If we sample in larger samples in the non-timestamp
+		 * case, we could grossly overestimate the RTT especially
+		 * with chatty applications or bulk transfer apps which
+		 * are stalled on filesystem I/O.
+		 *
+		 * Also, since we are only going for a minimum in the
+		 * non-timestamp case, we do not smoothe things out
+		 * else with timestamps disabled convergance takes too
+		 * long.
+		 */
+		if (!win_dep) {
+			m -= (new_sample >> 3);
+			new_sample += m;
+		} else if (m < new_sample)
+			new_sample = m << 3;
+	} else {
+		/* No previous mesaure. */
+		new_sample = m << 3;
+	}
+	if (tp->rcv_rtt_est.rtt != new_sample)
+		tp->rcv_rtt_est.rtt = new_sample;
+}
+static inline void tcp_rcv_rtt_measure(struct tcp_opt *tp)
+{
+	if (tp->rcv_rtt_est.time == 0)
+		goto new_measure;
+	if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
+		return;
+	tcp_rcv_rtt_update(tp,
+			   jiffies - tp->rcv_rtt_est.time,
+			   1);
+new_measure:
+	tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
+	tp->rcv_rtt_est.time = tcp_time_stamp;
+}
+static inline void tcp_rcv_rtt_measure_ts(struct tcp_opt *tp, struct sk_buff *skb)
+{
+	if (tp->rcv_tsecr &&
+	    (TCP_SKB_CB(skb)->end_seq -
+	     TCP_SKB_CB(skb)->seq >= tp->ack.rcv_mss))
+		tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_tsecr, 0);
+}
 /* There is something which you must keep in mind when you analyze the
 * behavior of the tp->ato delayed ack timeout interval.  When a
 * connection starts up, we want to ack as quickly as possible.  The
@@ -382,6 +449,8 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b
 	tcp_measure_rcv_mss(tp, skb);
+	tcp_rcv_rtt_measure(tp);
 	now = tcp_time_stamp;
 	if (!tp->ack.ato) {
@@ -4045,6 +4114,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
 				    tp->rcv_nxt == tp->rcv_wup)
 					tcp_store_ts_recent(tp);
+				tcp_rcv_rtt_measure_ts(tp, skb);
 				/* We know that such packets are checksummed
 				 * on entry.
 				 */
@@ -4076,6 +4148,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 					    tp->rcv_nxt == tp->rcv_wup)
 						tcp_store_ts_recent(tp);
+					tcp_rcv_rtt_measure_ts(tp, skb);
 					__skb_pull(skb, tcp_header_len);
 					tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 					NET_INC_STATS_BH(TCPHPHitsToUser);
@@ -4095,6 +4169,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				    tp->rcv_nxt == tp->rcv_wup)
 					tcp_store_ts_recent(tp);
+				tcp_rcv_rtt_measure_ts(tp, skb);
 				if ((int)skb->truesize > sk->sk_forward_alloc)
 					goto step5;
@@ -4191,6 +4267,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	if(th->ack)
 		tcp_ack(sk, skb, FLAG_SLOWPATH);
+	tcp_rcv_rtt_measure_ts(tp, skb);
 	/* Process urgent data. */
 	tcp_urg(sk, skb, th);