Commit a9e050f4 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: tcp: GRO should be ECN friendly

While doing TCP ECN tests, I discovered GRO was reordering packets if it
receives one packet with CE set, while previous packets in same NAPI run
have ECT(0) for the same flow :

09:25:25.857620 IP (tos 0x2,ECT(0), ttl 64, id 27893, offset 0, flags
[DF], proto TCP (6), length 4396)
    172.30.42.19.54550 > 172.30.42.13.44139: Flags [.], seq
233801:238145, ack 1, win 115, options [nop,nop,TS val 3397779 ecr
1990627], length 4344

09:25:25.857626 IP (tos 0x3,CE, ttl 64, id 27892, offset 0, flags [DF],
proto TCP (6), length 1500)
    172.30.42.19.54550 > 172.30.42.13.44139: Flags [.], seq
232353:233801, ack 1, win 115, options [nop,nop,TS val 3397779 ecr
1990627], length 1448

09:25:25.857638 IP (tos 0x0, ttl 64, id 34581, offset 0, flags [DF],
proto TCP (6), length 64)
    172.30.42.13.44139 > 172.30.42.19.54550: Flags [.], cksum 0xac8f
(incorrect -> 0xca69), ack 232353, win 1271, options [nop,nop,TS val
1990627 ecr 3397779,nop,nop,sack 1 {233801:238145}], length 0

We have two problems here :

1) GRO reorders packets

  If NIC gave packet1, then packet2, which happen to be from "different
flows"  GRO feeds stack with packet2, then packet1. I have yet to
understand how to solve this problem.

2) GRO is not ECN friendly

Delivering packets out of order makes TCP stack not as fast as it could
be.

In this patch I suggest we make the tos test not part of the 'same_flow'
determination, but part of the 'should flush' logic
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 14a19680
...@@ -1364,7 +1364,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, ...@@ -1364,7 +1364,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
if (*(u8 *)iph != 0x45) if (*(u8 *)iph != 0x45)
goto out_unlock; goto out_unlock;
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) if (unlikely(ip_fast_csum((u8 *)iph, 5)))
goto out_unlock; goto out_unlock;
id = ntohl(*(__be32 *)&iph->id); id = ntohl(*(__be32 *)&iph->id);
...@@ -1380,7 +1380,6 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, ...@@ -1380,7 +1380,6 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
iph2 = ip_hdr(p); iph2 = ip_hdr(p);
if ((iph->protocol ^ iph2->protocol) | if ((iph->protocol ^ iph2->protocol) |
(iph->tos ^ iph2->tos) |
((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
NAPI_GRO_CB(p)->same_flow = 0; NAPI_GRO_CB(p)->same_flow = 0;
...@@ -1390,6 +1389,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, ...@@ -1390,6 +1389,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
/* All fields must match except length and checksum. */ /* All fields must match except length and checksum. */
NAPI_GRO_CB(p)->flush |= NAPI_GRO_CB(p)->flush |=
(iph->ttl ^ iph2->ttl) | (iph->ttl ^ iph2->ttl) |
(iph->tos ^ iph2->tos) |
((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
NAPI_GRO_CB(p)->flush |= flush; NAPI_GRO_CB(p)->flush |= flush;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment