Commit 4b9b1cdf authored by Nikolay Aleksandrov's avatar Nikolay Aleksandrov Committed by David S. Miller

net: fix wrong mac_len calculation for vlans

After 1e785f48 ("net: Start with correct mac_len in
skb_network_protocol") skb->mac_len is used as a start of the
calculation in skb_network_protocol() but that is not always correct. If
skb->protocol == 8021Q/AD, usually the vlan header is already inserted
in the skb (i.e. vlan reorder hdr == 0). Usually when the packet enters
dev_hard_xmit it has mac_len == 0 so we take 2 bytes from the
destination mac address (skb->data + VLAN_HLEN) as a type in
skb_network_protocol() and return vlan_depth == 4. In the case where TSO is
off, then the mac_len is set but it's == 18 (ETH_HLEN + VLAN_HLEN), so
skb_network_protocol() returns a type from inside the packet and
offset == 22. Also make vlan_depth unsigned as suggested before.
As suggested by Eric Dumazet, move the while() loop in the if() so we
can avoid additional testing in fast path.

Here are few netperf tests + debug printk's to illustrate:
cat netperf.tso-on.reorder-on.bugged
- Vlan -> device (reorder on, default, this case is okay)
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
192.168.3.1 () port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00    7111.54
[   81.605435] skb->len 65226 skb->gso_size 1448 skb->proto 0x800
skb->mac_len 0 vlan_depth 0 type 0x800

- Vlan -> device (reorder off, bad)
cat netperf.tso-on.reorder-off.bugged
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
192.168.3.1 () port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00     241.35
[  204.578332] skb->len 1518 skb->gso_size 0 skb->proto 0x8100
skb->mac_len 0 vlan_depth 4 type 0x5301
0x5301 are the last two bytes of the destination mac.

And if we stop TSO, we may get even the following:
[   83.343156] skb->len 2966 skb->gso_size 1448 skb->proto 0x8100
skb->mac_len 18 vlan_depth 22 type 0xb84
Because mac_len already accounts for VLAN_HLEN.

After the fix:
cat netperf.tso-on.reorder-off.fixed
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
192.168.3.1 () port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.01    5001.46
[   81.888489] skb->len 65230 skb->gso_size 1448 skb->proto 0x8100
skb->mac_len 0 vlan_depth 18 type 0x800

CC: Vlad Yasevich <vyasevic@redhat.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: Daniel Borkman <dborkman@redhat.com>
CC: David S. Miller <davem@davemloft.net>

Fixes:1e785f48 ("net: Start with correct mac_len in
skb_network_protocol")
Signed-off-by: default avatarNikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6ce995c6
...@@ -2283,8 +2283,8 @@ EXPORT_SYMBOL(skb_checksum_help); ...@@ -2283,8 +2283,8 @@ EXPORT_SYMBOL(skb_checksum_help);
__be16 skb_network_protocol(struct sk_buff *skb, int *depth) __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{ {
unsigned int vlan_depth = skb->mac_len;
__be16 type = skb->protocol; __be16 type = skb->protocol;
int vlan_depth = skb->mac_len;
/* Tunnel gso handlers can set protocol to ethernet. */ /* Tunnel gso handlers can set protocol to ethernet. */
if (type == htons(ETH_P_TEB)) { if (type == htons(ETH_P_TEB)) {
...@@ -2297,15 +2297,30 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) ...@@ -2297,15 +2297,30 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
type = eth->h_proto; type = eth->h_proto;
} }
while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { /* if skb->protocol is 802.1Q/AD then the header should already be
struct vlan_hdr *vh; * present at mac_len - VLAN_HLEN (if mac_len > 0), or at
* ETH_HLEN otherwise
if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) */
return 0; if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
if (vlan_depth) {
vh = (struct vlan_hdr *)(skb->data + vlan_depth); if (unlikely(WARN_ON(vlan_depth < VLAN_HLEN)))
type = vh->h_vlan_encapsulated_proto; return 0;
vlan_depth += VLAN_HLEN; vlan_depth -= VLAN_HLEN;
} else {
vlan_depth = ETH_HLEN;
}
do {
struct vlan_hdr *vh;
if (unlikely(!pskb_may_pull(skb,
vlan_depth + VLAN_HLEN)))
return 0;
vh = (struct vlan_hdr *)(skb->data + vlan_depth);
type = vh->h_vlan_encapsulated_proto;
vlan_depth += VLAN_HLEN;
} while (type == htons(ETH_P_8021Q) ||
type == htons(ETH_P_8021AD));
} }
*depth = vlan_depth; *depth = vlan_depth;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment