Commit b0da8537 authored by Michael Chan's avatar Michael Chan Committed by David S. Miller

[NET]: Add ECN support for TSO

In the current TSO implementation, NETIF_F_TSO and ECN cannot be
turned on together in a TCP connection.  The problem is that most
hardware that supports TSO does not handle CWR correctly if it is set
in the TSO packet.  Correct handling requires CWR to be set in the
first packet only if it is set in the TSO header.

This patch adds the ability to turn on NETIF_F_TSO and ECN using
GSO if necessary to handle TSO packets with CWR set.  Hardware
that handles CWR correctly can turn on NETIF_F_TSO_ECN in the dev->
features flag.

All TSO packets with CWR set will have the SKB_GSO_TCPV4_ECN set.  If
the output device does not have the NETIF_F_TSO_ECN feature set, GSO
will split the packet up correctly with CWR only set in the first
segment.

With help from Herbert Xu <herbert@gondor.apana.org.au>.

Since ECN can always be enabled with TSO, the SOCK_NO_LARGESEND sock
flag is completely removed.
Signed-off-by: default avatarMichael Chan <mchan@broadcom.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 877ce7c1
...@@ -316,6 +316,7 @@ struct net_device ...@@ -316,6 +316,7 @@ struct net_device
#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT) #define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
#define NETIF_F_UFO (SKB_GSO_UDPV4 << NETIF_F_GSO_SHIFT) #define NETIF_F_UFO (SKB_GSO_UDPV4 << NETIF_F_GSO_SHIFT)
#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT) #define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
#define NETIF_F_TSO_ECN (SKB_GSO_TCPV4_ECN << NETIF_F_GSO_SHIFT)
#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) #define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM) #define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM)
......
...@@ -175,6 +175,9 @@ enum { ...@@ -175,6 +175,9 @@ enum {
/* This indicates the skb is from an untrusted source. */ /* This indicates the skb is from an untrusted source. */
SKB_GSO_DODGY = 1 << 2, SKB_GSO_DODGY = 1 << 2,
/* This indicates the tcp segment has CWR set. */
SKB_GSO_TCPV4_ECN = 1 << 3,
}; };
/** /**
......
...@@ -383,7 +383,6 @@ enum sock_flags { ...@@ -383,7 +383,6 @@ enum sock_flags {
SOCK_USE_WRITE_QUEUE, /* whether to call sk->sk_write_space in sock_wfree */ SOCK_USE_WRITE_QUEUE, /* whether to call sk->sk_write_space in sock_wfree */
SOCK_DBG, /* %SO_DEBUG setting */ SOCK_DBG, /* %SO_DEBUG setting */
SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */ SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */
SOCK_NO_LARGESEND, /* whether to sent large segments or not */
SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
}; };
...@@ -1033,7 +1032,7 @@ static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst) ...@@ -1033,7 +1032,7 @@ static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
if (sk->sk_route_caps & NETIF_F_GSO) if (sk->sk_route_caps & NETIF_F_GSO)
sk->sk_route_caps |= NETIF_F_TSO; sk->sk_route_caps |= NETIF_F_TSO;
if (sk->sk_route_caps & NETIF_F_TSO) { if (sk->sk_route_caps & NETIF_F_TSO) {
if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) if (dst->header_len)
sk->sk_route_caps &= ~NETIF_F_TSO; sk->sk_route_caps &= ~NETIF_F_TSO;
else else
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
......
...@@ -31,10 +31,9 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct tcp_sock *tp, ...@@ -31,10 +31,9 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct tcp_sock *tp,
struct sk_buff *skb) struct sk_buff *skb)
{ {
tp->ecn_flags = 0; tp->ecn_flags = 0;
if (sysctl_tcp_ecn && !(sk->sk_route_caps & NETIF_F_TSO)) { if (sysctl_tcp_ecn) {
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR; TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR;
tp->ecn_flags = TCP_ECN_OK; tp->ecn_flags = TCP_ECN_OK;
sock_set_flag(sk, SOCK_NO_LARGESEND);
} }
} }
...@@ -56,6 +55,9 @@ static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp, ...@@ -56,6 +55,9 @@ static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp,
if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) { if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) {
tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
skb->h.th->cwr = 1; skb->h.th->cwr = 1;
if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
skb_shinfo(skb)->gso_type |=
SKB_GSO_TCPV4_ECN;
} }
} else { } else {
/* ACK or retransmitted segment: clear ECT|CE */ /* ACK or retransmitted segment: clear ECT|CE */
......
...@@ -4178,8 +4178,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -4178,8 +4178,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
*/ */
TCP_ECN_rcv_synack(tp, th); TCP_ECN_rcv_synack(tp, th);
if (tp->ecn_flags&TCP_ECN_OK)
sock_set_flag(sk, SOCK_NO_LARGESEND);
tp->snd_wl1 = TCP_SKB_CB(skb)->seq; tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
tcp_ack(sk, skb, FLAG_SLOWPATH); tcp_ack(sk, skb, FLAG_SLOWPATH);
...@@ -4322,8 +4320,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -4322,8 +4320,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tp->max_window = tp->snd_wnd; tp->max_window = tp->snd_wnd;
TCP_ECN_rcv_syn(tp, th); TCP_ECN_rcv_syn(tp, th);
if (tp->ecn_flags&TCP_ECN_OK)
sock_set_flag(sk, SOCK_NO_LARGESEND);
tcp_mtup_init(sk); tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
......
...@@ -440,8 +440,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, ...@@ -440,8 +440,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
newtp->rx_opt.mss_clamp = req->mss; newtp->rx_opt.mss_clamp = req->mss;
TCP_ECN_openreq_child(newtp, req); TCP_ECN_openreq_child(newtp, req);
if (newtp->ecn_flags&TCP_ECN_OK)
sock_set_flag(newsk, SOCK_NO_LARGESEND);
TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS); TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS);
} }
......
...@@ -2044,8 +2044,6 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, ...@@ -2044,8 +2044,6 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
memset(th, 0, sizeof(struct tcphdr)); memset(th, 0, sizeof(struct tcphdr));
th->syn = 1; th->syn = 1;
th->ack = 1; th->ack = 1;
if (dst->dev->features&NETIF_F_TSO)
ireq->ecn_ok = 0;
TCP_ECN_make_synack(req, th); TCP_ECN_make_synack(req, th);
th->source = inet_sk(sk)->sport; th->source = inet_sk(sk)->sport;
th->dest = ireq->rmt_port; th->dest = ireq->rmt_port;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment