Commit cff04e2d authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

tcp_cubic: switch bictcp_clock() to usec resolution

Current 1ms clock feeds ca->round_start, ca->delay_min,
ca->last_ack.

This is quite problematic for data-center flows, where delay_min
is way below 1 ms.

This means Hystart Train detection triggers every time jiffies value
is updated, since "((s32)(now - ca->round_start) > ca->delay_min >> 4)"
expression becomes true.

This kind of random behavior can be solved by reusing the existing
usec timestamp that TCP keeps in tp->tcp_mstamp

Note that a followup patch will tweak things a bit, because
during slow start, GRO aggregation on receivers naturally
increases the RTT as TSO packets gradually come to ~64KB size.

To recap, right after this patch CUBIC Hystart train detection
is more aggressive, since short RTT flows might exit slow start at
cwnd = 20, instead of being possibly unbounded.

Following patch will address this problem.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Acked-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 35821fc2
...@@ -40,8 +40,8 @@ ...@@ -40,8 +40,8 @@
/* Number of delay samples for detecting the increase of delay */ /* Number of delay samples for detecting the increase of delay */
#define HYSTART_MIN_SAMPLES 8 #define HYSTART_MIN_SAMPLES 8
#define HYSTART_DELAY_MIN (4U<<3) #define HYSTART_DELAY_MIN (4000U) /* 4 ms */
#define HYSTART_DELAY_MAX (16U<<3) #define HYSTART_DELAY_MAX (16000U) /* 16 ms */
#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) #define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
static int fast_convergence __read_mostly = 1; static int fast_convergence __read_mostly = 1;
...@@ -53,7 +53,7 @@ static int tcp_friendliness __read_mostly = 1; ...@@ -53,7 +53,7 @@ static int tcp_friendliness __read_mostly = 1;
static int hystart __read_mostly = 1; static int hystart __read_mostly = 1;
static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
static int hystart_low_window __read_mostly = 16; static int hystart_low_window __read_mostly = 16;
static int hystart_ack_delta __read_mostly = 2; static int hystart_ack_delta_us __read_mostly = 2000;
static u32 cube_rtt_scale __read_mostly; static u32 cube_rtt_scale __read_mostly;
static u32 beta_scale __read_mostly; static u32 beta_scale __read_mostly;
...@@ -77,8 +77,8 @@ MODULE_PARM_DESC(hystart_detect, "hybrid slow start detection mechanisms" ...@@ -77,8 +77,8 @@ MODULE_PARM_DESC(hystart_detect, "hybrid slow start detection mechanisms"
" 1: packet-train 2: delay 3: both packet-train and delay"); " 1: packet-train 2: delay 3: both packet-train and delay");
module_param(hystart_low_window, int, 0644); module_param(hystart_low_window, int, 0644);
MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
module_param(hystart_ack_delta, int, 0644); module_param(hystart_ack_delta_us, int, 0644);
MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)"); MODULE_PARM_DESC(hystart_ack_delta_us, "spacing between ack's indicating train (usecs)");
/* BIC TCP Parameters */ /* BIC TCP Parameters */
struct bictcp { struct bictcp {
...@@ -89,7 +89,7 @@ struct bictcp { ...@@ -89,7 +89,7 @@ struct bictcp {
u32 bic_origin_point;/* origin point of bic function */ u32 bic_origin_point;/* origin point of bic function */
u32 bic_K; /* time to origin point u32 bic_K; /* time to origin point
from the beginning of the current epoch */ from the beginning of the current epoch */
u32 delay_min; /* min delay (msec << 3) */ u32 delay_min; /* min delay (usec) */
u32 epoch_start; /* beginning of an epoch */ u32 epoch_start; /* beginning of an epoch */
u32 ack_cnt; /* number of acks */ u32 ack_cnt; /* number of acks */
u32 tcp_cwnd; /* estimated tcp cwnd */ u32 tcp_cwnd; /* estimated tcp cwnd */
...@@ -117,13 +117,9 @@ static inline void bictcp_reset(struct bictcp *ca) ...@@ -117,13 +117,9 @@ static inline void bictcp_reset(struct bictcp *ca)
ca->found = 0; ca->found = 0;
} }
static inline u32 bictcp_clock(void) static inline u32 bictcp_clock_us(const struct sock *sk)
{ {
#if HZ < 1000 return tcp_sk(sk)->tcp_mstamp;
return ktime_to_ms(ktime_get_real());
#else
return jiffies_to_msecs(jiffies);
#endif
} }
static inline void bictcp_hystart_reset(struct sock *sk) static inline void bictcp_hystart_reset(struct sock *sk)
...@@ -131,7 +127,7 @@ static inline void bictcp_hystart_reset(struct sock *sk) ...@@ -131,7 +127,7 @@ static inline void bictcp_hystart_reset(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk); struct bictcp *ca = inet_csk_ca(sk);
ca->round_start = ca->last_ack = bictcp_clock(); ca->round_start = ca->last_ack = bictcp_clock_us(sk);
ca->end_seq = tp->snd_nxt; ca->end_seq = tp->snd_nxt;
ca->curr_rtt = ~0U; ca->curr_rtt = ~0U;
ca->sample_cnt = 0; ca->sample_cnt = 0;
...@@ -276,7 +272,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked) ...@@ -276,7 +272,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
*/ */
t = (s32)(tcp_jiffies32 - ca->epoch_start); t = (s32)(tcp_jiffies32 - ca->epoch_start);
t += msecs_to_jiffies(ca->delay_min >> 3); t += usecs_to_jiffies(ca->delay_min);
/* change the unit from HZ to bictcp_HZ */ /* change the unit from HZ to bictcp_HZ */
t <<= BICTCP_HZ; t <<= BICTCP_HZ;
do_div(t, HZ); do_div(t, HZ);
...@@ -382,12 +378,12 @@ static void hystart_update(struct sock *sk, u32 delay) ...@@ -382,12 +378,12 @@ static void hystart_update(struct sock *sk, u32 delay)
struct bictcp *ca = inet_csk_ca(sk); struct bictcp *ca = inet_csk_ca(sk);
if (hystart_detect & HYSTART_ACK_TRAIN) { if (hystart_detect & HYSTART_ACK_TRAIN) {
u32 now = bictcp_clock(); u32 now = bictcp_clock_us(sk);
/* first detection parameter - ack-train detection */ /* first detection parameter - ack-train detection */
if ((s32)(now - ca->last_ack) <= hystart_ack_delta) { if ((s32)(now - ca->last_ack) <= hystart_ack_delta_us) {
ca->last_ack = now; ca->last_ack = now;
if ((s32)(now - ca->round_start) > ca->delay_min >> 4) { if ((s32)(now - ca->round_start) > ca->delay_min >> 1) {
ca->found = 1; ca->found = 1;
NET_INC_STATS(sock_net(sk), NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINDETECT); LINUX_MIB_TCPHYSTARTTRAINDETECT);
...@@ -421,9 +417,6 @@ static void hystart_update(struct sock *sk, u32 delay) ...@@ -421,9 +417,6 @@ static void hystart_update(struct sock *sk, u32 delay)
} }
} }
/* Track delayed acknowledgment ratio using sliding window
* ratio = (15*ratio + sample) / 16
*/
static void bictcp_acked(struct sock *sk, const struct ack_sample *sample) static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
{ {
const struct tcp_sock *tp = tcp_sk(sk); const struct tcp_sock *tp = tcp_sk(sk);
...@@ -438,7 +431,7 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample) ...@@ -438,7 +431,7 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ) if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ)
return; return;
delay = (sample->rtt_us << 3) / USEC_PER_MSEC; delay = sample->rtt_us;
if (delay == 0) if (delay == 0)
delay = 1; delay = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment