Commit fab9593d authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-add-4-new-stats'

Wei Wang says:

====================
tcp: add 4 new stats

This patch series adds 3 RFC4898 stats:
1. tcpEStatsPerfHCDataOctetsOut
2. tcpEStatsPerfOctetsRetrans
3. tcpEStatsStackDSACKDups
and an addtional stat to record the number of data packet reordering
events seen:
4. tcp_reord_seen

Together with the existing stats, application can use them to measure
the retransmission rate in bytes, exclude spurious retransmissions
reflected by DSACK, and keep track of the reordering events on live
connections.
In particular the networks with different MTUs make bytes-based loss stats
more useful. Google servers have been using these stats for many years to
instrument transport and network performance.

Note: The first patch is a refactor to add a helper to calculate
opt_stats size in order to make later changes cleaner.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 53dd9652 7ec65372
...@@ -181,10 +181,16 @@ struct tcp_sock { ...@@ -181,10 +181,16 @@ struct tcp_sock {
u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
* total number of data segments sent. * total number of data segments sent.
*/ */
u64 bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut
* total number of data bytes sent.
*/
u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
* sum(delta(snd_una)), or how many bytes * sum(delta(snd_una)), or how many bytes
* were acked. * were acked.
*/ */
u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups
* total number of DSACK blocks received
*/
u32 snd_una; /* First byte we want an ack for */ u32 snd_una; /* First byte we want an ack for */
u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */
u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
...@@ -214,8 +220,7 @@ struct tcp_sock { ...@@ -214,8 +220,7 @@ struct tcp_sock {
#define TCP_RACK_RECOVERY_THRESH 16 #define TCP_RACK_RECOVERY_THRESH 16
u8 reo_wnd_persist:5, /* No. of recovery since last adj */ u8 reo_wnd_persist:5, /* No. of recovery since last adj */
dsack_seen:1, /* Whether DSACK seen after last adj */ dsack_seen:1, /* Whether DSACK seen after last adj */
advanced:1, /* mstamp advanced since last lost marking */ advanced:1; /* mstamp advanced since last lost marking */
reord:1; /* reordering detected */
} rack; } rack;
u16 advmss; /* Advertised MSS */ u16 advmss; /* Advertised MSS */
u8 compressed_ack; u8 compressed_ack;
...@@ -261,6 +266,7 @@ struct tcp_sock { ...@@ -261,6 +266,7 @@ struct tcp_sock {
u8 ecn_flags; /* ECN status bits. */ u8 ecn_flags; /* ECN status bits. */
u8 keepalive_probes; /* num of allowed keep alive probes */ u8 keepalive_probes; /* num of allowed keep alive probes */
u32 reordering; /* Packet reordering metric. */ u32 reordering; /* Packet reordering metric. */
u32 reord_seen; /* number of data packet reordering events */
u32 snd_up; /* Urgent pointer */ u32 snd_up; /* Urgent pointer */
/* /*
...@@ -330,6 +336,9 @@ struct tcp_sock { ...@@ -330,6 +336,9 @@ struct tcp_sock {
* the first SYN. */ * the first SYN. */
u32 undo_marker; /* snd_una upon a new recovery episode. */ u32 undo_marker; /* snd_una upon a new recovery episode. */
int undo_retrans; /* number of undoable retransmissions. */ int undo_retrans; /* number of undoable retransmissions. */
u64 bytes_retrans; /* RFC4898 tcpEStatsPerfOctetsRetrans
* Total data bytes retransmitted
*/
u32 total_retrans; /* Total retransmits for entire connection */ u32 total_retrans; /* Total retransmits for entire connection */
u32 urg_seq; /* Seq of received urgent pointer */ u32 urg_seq; /* Seq of received urgent pointer */
......
...@@ -235,6 +235,11 @@ struct tcp_info { ...@@ -235,6 +235,11 @@ struct tcp_info {
__u32 tcpi_delivered; __u32 tcpi_delivered;
__u32 tcpi_delivered_ce; __u32 tcpi_delivered_ce;
__u64 tcpi_bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut */
__u64 tcpi_bytes_retrans; /* RFC4898 tcpEStatsPerfOctetsRetrans */
__u32 tcpi_dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups */
__u32 tcpi_reord_seen; /* reordering events seen */
}; };
/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
...@@ -257,7 +262,10 @@ enum { ...@@ -257,7 +262,10 @@ enum {
TCP_NLA_SND_SSTHRESH, /* Slow start size threshold */ TCP_NLA_SND_SSTHRESH, /* Slow start size threshold */
TCP_NLA_DELIVERED, /* Data pkts delivered incl. out-of-order */ TCP_NLA_DELIVERED, /* Data pkts delivered incl. out-of-order */
TCP_NLA_DELIVERED_CE, /* Like above but only ones w/ CE marks */ TCP_NLA_DELIVERED_CE, /* Like above but only ones w/ CE marks */
TCP_NLA_BYTES_SENT, /* Data bytes sent including retransmission */
TCP_NLA_BYTES_RETRANS, /* Data bytes retransmitted */
TCP_NLA_DSACK_DUPS, /* DSACK blocks received */
TCP_NLA_REORD_SEEN, /* reordering events seen */
}; };
/* for TCP_MD5SIG socket option */ /* for TCP_MD5SIG socket option */
......
...@@ -2594,6 +2594,10 @@ int tcp_disconnect(struct sock *sk, int flags) ...@@ -2594,6 +2594,10 @@ int tcp_disconnect(struct sock *sk, int flags)
sk->sk_rx_dst = NULL; sk->sk_rx_dst = NULL;
tcp_saved_syn_free(tp); tcp_saved_syn_free(tp);
tp->compressed_ack = 0; tp->compressed_ack = 0;
tp->bytes_sent = 0;
tp->bytes_retrans = 0;
tp->dsack_dups = 0;
tp->reord_seen = 0;
/* Clean up fastopen related fields */ /* Clean up fastopen related fields */
tcp_free_fastopen_req(tp); tcp_free_fastopen_req(tp);
...@@ -3201,10 +3205,41 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) ...@@ -3201,10 +3205,41 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_delivery_rate = rate64; info->tcpi_delivery_rate = rate64;
info->tcpi_delivered = tp->delivered; info->tcpi_delivered = tp->delivered;
info->tcpi_delivered_ce = tp->delivered_ce; info->tcpi_delivered_ce = tp->delivered_ce;
info->tcpi_bytes_sent = tp->bytes_sent;
info->tcpi_bytes_retrans = tp->bytes_retrans;
info->tcpi_dsack_dups = tp->dsack_dups;
info->tcpi_reord_seen = tp->reord_seen;
unlock_sock_fast(sk, slow); unlock_sock_fast(sk, slow);
} }
EXPORT_SYMBOL_GPL(tcp_get_info); EXPORT_SYMBOL_GPL(tcp_get_info);
static size_t tcp_opt_stats_get_size(void)
{
return
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BUSY */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_RWND_LIMITED */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_SNDBUF_LIMITED */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_DATA_SEGS_OUT */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_TOTAL_RETRANS */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_PACING_RATE */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_DELIVERY_RATE */
nla_total_size(sizeof(u32)) + /* TCP_NLA_SND_CWND */
nla_total_size(sizeof(u32)) + /* TCP_NLA_REORDERING */
nla_total_size(sizeof(u32)) + /* TCP_NLA_MIN_RTT */
nla_total_size(sizeof(u8)) + /* TCP_NLA_RECUR_RETRANS */
nla_total_size(sizeof(u8)) + /* TCP_NLA_DELIVERY_RATE_APP_LMT */
nla_total_size(sizeof(u32)) + /* TCP_NLA_SNDQ_SIZE */
nla_total_size(sizeof(u8)) + /* TCP_NLA_CA_STATE */
nla_total_size(sizeof(u32)) + /* TCP_NLA_SND_SSTHRESH */
nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED */
nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED_CE */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */
nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */
nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */
0;
}
struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
{ {
const struct tcp_sock *tp = tcp_sk(sk); const struct tcp_sock *tp = tcp_sk(sk);
...@@ -3213,9 +3248,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) ...@@ -3213,9 +3248,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
u64 rate64; u64 rate64;
u32 rate; u32 rate;
stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) + stats = alloc_skb(tcp_opt_stats_get_size(), GFP_ATOMIC);
7 * nla_total_size(sizeof(u32)) +
3 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
if (!stats) if (!stats)
return NULL; return NULL;
...@@ -3251,6 +3284,13 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) ...@@ -3251,6 +3284,13 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una); nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state); nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent,
TCP_NLA_PAD);
nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans,
TCP_NLA_PAD);
nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups);
nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen);
return stats; return stats;
} }
......
...@@ -874,6 +874,7 @@ static void tcp_dsack_seen(struct tcp_sock *tp) ...@@ -874,6 +874,7 @@ static void tcp_dsack_seen(struct tcp_sock *tp)
{ {
tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
tp->rack.dsack_seen = 1; tp->rack.dsack_seen = 1;
tp->dsack_dups++;
} }
/* It's reordering when higher sequence was delivered (i.e. sacked) before /* It's reordering when higher sequence was delivered (i.e. sacked) before
...@@ -905,8 +906,8 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, ...@@ -905,8 +906,8 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
sock_net(sk)->ipv4.sysctl_tcp_max_reordering); sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
} }
tp->rack.reord = 1;
/* This exciting event is worth to be remembered. 8) */ /* This exciting event is worth to be remembered. 8) */
tp->reord_seen++;
NET_INC_STATS(sock_net(sk), NET_INC_STATS(sock_net(sk),
ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER); ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
} }
...@@ -1870,6 +1871,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) ...@@ -1870,6 +1871,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
tp->reordering = min_t(u32, tp->packets_out + addend, tp->reordering = min_t(u32, tp->packets_out + addend,
sock_net(sk)->ipv4.sysctl_tcp_max_reordering); sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
tp->reord_seen++;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
} }
......
...@@ -1136,6 +1136,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, ...@@ -1136,6 +1136,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
if (skb->len != tcp_header_size) { if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk); tcp_event_data_sent(tp, sk);
tp->data_segs_out += tcp_skb_pcount(skb); tp->data_segs_out += tcp_skb_pcount(skb);
tp->bytes_sent += skb->len - tcp_header_size;
tcp_internal_pacing(sk, skb); tcp_internal_pacing(sk, skb);
} }
...@@ -2870,6 +2871,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) ...@@ -2870,6 +2871,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
tp->total_retrans += segs; tp->total_retrans += segs;
tp->bytes_retrans += skb->len;
/* make sure skb->data is aligned on arches that require it /* make sure skb->data is aligned on arches that require it
* and check if ack-trimming & collapsing extended the headroom * and check if ack-trimming & collapsing extended the headroom
......
...@@ -25,7 +25,7 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk) ...@@ -25,7 +25,7 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
if (!tp->rack.reord) { if (!tp->reord_seen) {
/* If reordering has not been observed, be aggressive during /* If reordering has not been observed, be aggressive during
* the recovery or starting the recovery by DUPACK threshold. * the recovery or starting the recovery by DUPACK threshold.
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment