Commit cef401de authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: fix possible wrong checksum generation

Pravin Shelar mentioned that GSO could potentially generate
wrong TX checksum if skb has fragments that are overwritten
by the user between the checksum computation and transmit.

He suggested to linearize skbs but this extra copy can be
avoided for normal tcp skbs cooked by tcp_sendmsg().

This patch introduces a new SKB_GSO_SHARED_FRAG flag, set
in skb_shinfo(skb)->gso_type if at least one frag can be
modified by the user.

Typical sources of such possible overwrites are {vm}splice(),
sendfile(), and macvtap/tun/virtio_net drivers.

Tested:

$ netperf -H 7.7.8.84
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
7.7.8.84 () port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00    3959.52

$ netperf -H 7.7.8.84 -t TCP_SENDFILE
TCP SENDFILE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.8.84 ()
port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00    3216.80

Performance of the SENDFILE is impacted by the extra allocation and
copy, and because we use order-0 pages, while the TCP_STREAM uses
bigger pages.
Reported-by: default avatarPravin Shelar <pshelar@nicira.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 61550022
...@@ -543,6 +543,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, ...@@ -543,6 +543,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
skb->data_len += len; skb->data_len += len;
skb->len += len; skb->len += len;
skb->truesize += truesize; skb->truesize += truesize;
skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
atomic_add(truesize, &skb->sk->sk_wmem_alloc); atomic_add(truesize, &skb->sk->sk_wmem_alloc);
while (len) { while (len) {
int off = base & ~PAGE_MASK; int off = base & ~PAGE_MASK;
...@@ -598,7 +599,7 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, ...@@ -598,7 +599,7 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb,
if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
skb_shinfo(skb)->gso_size = vnet_hdr->gso_size; skb_shinfo(skb)->gso_size = vnet_hdr->gso_size;
skb_shinfo(skb)->gso_type = gso_type; skb_shinfo(skb)->gso_type |= gso_type;
/* Header must be checked, and gso_segs computed. */ /* Header must be checked, and gso_segs computed. */
skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
......
...@@ -1005,6 +1005,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, ...@@ -1005,6 +1005,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
skb->data_len += len; skb->data_len += len;
skb->len += len; skb->len += len;
skb->truesize += truesize; skb->truesize += truesize;
skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
atomic_add(truesize, &skb->sk->sk_wmem_alloc); atomic_add(truesize, &skb->sk->sk_wmem_alloc);
while (len) { while (len) {
int off = base & ~PAGE_MASK; int off = base & ~PAGE_MASK;
...@@ -1150,16 +1151,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, ...@@ -1150,16 +1151,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
} }
if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) { if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
unsigned short gso_type = 0;
pr_debug("GSO!\n"); pr_debug("GSO!\n");
switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_TCPV4:
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; gso_type = SKB_GSO_TCPV4;
break; break;
case VIRTIO_NET_HDR_GSO_TCPV6: case VIRTIO_NET_HDR_GSO_TCPV6:
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; gso_type = SKB_GSO_TCPV6;
break; break;
case VIRTIO_NET_HDR_GSO_UDP: case VIRTIO_NET_HDR_GSO_UDP:
skb_shinfo(skb)->gso_type = SKB_GSO_UDP; gso_type = SKB_GSO_UDP;
break; break;
default: default:
tun->dev->stats.rx_frame_errors++; tun->dev->stats.rx_frame_errors++;
...@@ -1168,9 +1171,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, ...@@ -1168,9 +1171,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
} }
if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN) if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; gso_type |= SKB_GSO_TCP_ECN;
skb_shinfo(skb)->gso_size = gso.gso_size; skb_shinfo(skb)->gso_size = gso.gso_size;
skb_shinfo(skb)->gso_type |= gso_type;
if (skb_shinfo(skb)->gso_size == 0) { if (skb_shinfo(skb)->gso_size == 0) {
tun->dev->stats.rx_frame_errors++; tun->dev->stats.rx_frame_errors++;
kfree_skb(skb); kfree_skb(skb);
......
...@@ -220,6 +220,7 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page, ...@@ -220,6 +220,7 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page,
skb->len += size; skb->len += size;
skb->truesize += PAGE_SIZE; skb->truesize += PAGE_SIZE;
skb_shinfo(skb)->nr_frags++; skb_shinfo(skb)->nr_frags++;
skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
*len -= size; *len -= size;
} }
...@@ -379,16 +380,18 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) ...@@ -379,16 +380,18 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
ntohs(skb->protocol), skb->len, skb->pkt_type); ntohs(skb->protocol), skb->len, skb->pkt_type);
if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
unsigned short gso_type = 0;
pr_debug("GSO!\n"); pr_debug("GSO!\n");
switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_TCPV4:
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; gso_type = SKB_GSO_TCPV4;
break; break;
case VIRTIO_NET_HDR_GSO_UDP: case VIRTIO_NET_HDR_GSO_UDP:
skb_shinfo(skb)->gso_type = SKB_GSO_UDP; gso_type = SKB_GSO_UDP;
break; break;
case VIRTIO_NET_HDR_GSO_TCPV6: case VIRTIO_NET_HDR_GSO_TCPV6:
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; gso_type = SKB_GSO_TCPV6;
break; break;
default: default:
net_warn_ratelimited("%s: bad gso type %u.\n", net_warn_ratelimited("%s: bad gso type %u.\n",
...@@ -397,7 +400,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) ...@@ -397,7 +400,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
} }
if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; gso_type |= SKB_GSO_TCP_ECN;
skb_shinfo(skb)->gso_size = hdr->hdr.gso_size; skb_shinfo(skb)->gso_size = hdr->hdr.gso_size;
if (skb_shinfo(skb)->gso_size == 0) { if (skb_shinfo(skb)->gso_size == 0) {
...@@ -405,6 +408,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) ...@@ -405,6 +408,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
goto frame_err; goto frame_err;
} }
skb_shinfo(skb)->gso_type |= gso_type;
/* Header must be checked, and gso_segs computed. */ /* Header must be checked, and gso_segs computed. */
skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
skb_shinfo(skb)->gso_segs = 0; skb_shinfo(skb)->gso_segs = 0;
......
...@@ -307,6 +307,13 @@ enum { ...@@ -307,6 +307,13 @@ enum {
SKB_GSO_TCPV6 = 1 << 4, SKB_GSO_TCPV6 = 1 << 4,
SKB_GSO_FCOE = 1 << 5, SKB_GSO_FCOE = 1 << 5,
/* This indicates at least one fragment might be overwritten
* (as in vmsplice(), sendfile() ...)
* If we need to compute a TX checksum, we'll need to copy
* all frags to avoid possible bad checksum
*/
SKB_GSO_SHARED_FRAG = 1 << 6,
}; };
#if BITS_PER_LONG > 32 #if BITS_PER_LONG > 32
...@@ -2200,6 +2207,18 @@ static inline int skb_linearize(struct sk_buff *skb) ...@@ -2200,6 +2207,18 @@ static inline int skb_linearize(struct sk_buff *skb)
return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0; return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0;
} }
/**
* skb_has_shared_frag - can any frag be overwritten
* @skb: buffer to test
*
* Return true if the skb has at least one frag that might be modified
* by an external entity (as in vmsplice()/sendfile())
*/
static inline bool skb_has_shared_frag(const struct sk_buff *skb)
{
return skb_shinfo(skb)->gso_type & SKB_GSO_SHARED_FRAG;
}
/** /**
* skb_linearize_cow - make sure skb is linear and writable * skb_linearize_cow - make sure skb is linear and writable
* @skb: buffer to process * @skb: buffer to process
......
...@@ -2271,6 +2271,15 @@ int skb_checksum_help(struct sk_buff *skb) ...@@ -2271,6 +2271,15 @@ int skb_checksum_help(struct sk_buff *skb)
return -EINVAL; return -EINVAL;
} }
/* Before computing a checksum, we should make sure no frag could
* be modified by an external entity : checksum could be wrong.
*/
if (skb_has_shared_frag(skb)) {
ret = __skb_linearize(skb);
if (ret)
goto out;
}
offset = skb_checksum_start_offset(skb); offset = skb_checksum_start_offset(skb);
BUG_ON(offset >= skb_headlen(skb)); BUG_ON(offset >= skb_headlen(skb));
csum = skb_checksum(skb, offset, skb->len - offset, 0); csum = skb_checksum(skb, offset, skb->len - offset, 0);
......
...@@ -2340,6 +2340,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) ...@@ -2340,6 +2340,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
{ {
int pos = skb_headlen(skb); int pos = skb_headlen(skb);
skb_shinfo(skb1)->gso_type = skb_shinfo(skb)->gso_type;
if (len < pos) /* Split line is inside header. */ if (len < pos) /* Split line is inside header. */
skb_split_inside_header(skb, skb1, len, pos); skb_split_inside_header(skb, skb1, len, pos);
else /* Second chunk has no header, nothing to copy. */ else /* Second chunk has no header, nothing to copy. */
...@@ -2845,6 +2847,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) ...@@ -2845,6 +2847,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
skb_copy_from_linear_data_offset(skb, offset, skb_copy_from_linear_data_offset(skb, offset,
skb_put(nskb, hsize), hsize); skb_put(nskb, hsize), hsize);
skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
while (pos < offset + len && i < nfrags) { while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i]; *frag = skb_shinfo(skb)->frags[i];
__skb_frag_ref(frag); __skb_frag_ref(frag);
......
...@@ -1306,6 +1306,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, ...@@ -1306,6 +1306,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
SKB_GSO_UDP | SKB_GSO_UDP |
SKB_GSO_DODGY | SKB_GSO_DODGY |
SKB_GSO_TCP_ECN | SKB_GSO_TCP_ECN |
SKB_GSO_SHARED_FRAG |
0))) 0)))
goto out; goto out;
......
...@@ -738,7 +738,7 @@ static int ipgre_rcv(struct sk_buff *skb) ...@@ -738,7 +738,7 @@ static int ipgre_rcv(struct sk_buff *skb)
static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{ {
struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *old_iph = ip_hdr(skb); const struct iphdr *old_iph;
const struct iphdr *tiph; const struct iphdr *tiph;
struct flowi4 fl4; struct flowi4 fl4;
u8 tos; u8 tos;
...@@ -756,6 +756,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev ...@@ -756,6 +756,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
skb_checksum_help(skb)) skb_checksum_help(skb))
goto tx_error; goto tx_error;
old_iph = ip_hdr(skb);
if (dev->type == ARPHRD_ETHER) if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0; IPCB(skb)->flags = 0;
......
...@@ -472,7 +472,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -472,7 +472,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
__be16 df = tiph->frag_off; __be16 df = tiph->frag_off;
struct rtable *rt; /* Route to the other host */ struct rtable *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */ struct net_device *tdev; /* Device to other host */
const struct iphdr *old_iph = ip_hdr(skb); const struct iphdr *old_iph;
struct iphdr *iph; /* Our new IP header */ struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */ unsigned int max_headroom; /* The extra header space needed */
__be32 dst = tiph->daddr; __be32 dst = tiph->daddr;
...@@ -486,6 +486,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -486,6 +486,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
skb_checksum_help(skb)) skb_checksum_help(skb))
goto tx_error; goto tx_error;
old_iph = ip_hdr(skb);
if (tos & 1) if (tos & 1)
tos = old_iph->tos; tos = old_iph->tos;
......
...@@ -896,6 +896,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, ...@@ -896,6 +896,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
skb_fill_page_desc(skb, i, page, offset, copy); skb_fill_page_desc(skb, i, page, offset, copy);
} }
skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
skb->len += copy; skb->len += copy;
skb->data_len += copy; skb->data_len += copy;
skb->truesize += copy; skb->truesize += copy;
...@@ -3032,6 +3034,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, ...@@ -3032,6 +3034,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
SKB_GSO_DODGY | SKB_GSO_DODGY |
SKB_GSO_TCP_ECN | SKB_GSO_TCP_ECN |
SKB_GSO_TCPV6 | SKB_GSO_TCPV6 |
SKB_GSO_SHARED_FRAG |
0) || 0) ||
!(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
goto out; goto out;
......
...@@ -1240,13 +1240,13 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, ...@@ -1240,13 +1240,13 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
*/ */
if (!skb_shinfo(prev)->gso_size) { if (!skb_shinfo(prev)->gso_size) {
skb_shinfo(prev)->gso_size = mss; skb_shinfo(prev)->gso_size = mss;
skb_shinfo(prev)->gso_type = sk->sk_gso_type; skb_shinfo(prev)->gso_type |= sk->sk_gso_type;
} }
/* CHECKME: To clear or not to clear? Mimics normal skb currently */ /* CHECKME: To clear or not to clear? Mimics normal skb currently */
if (skb_shinfo(skb)->gso_segs <= 1) { if (skb_shinfo(skb)->gso_segs <= 1) {
skb_shinfo(skb)->gso_size = 0; skb_shinfo(skb)->gso_size = 0;
skb_shinfo(skb)->gso_type = 0; skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG;
} }
/* Difference in this won't matter, both ACKed by the same cumul. ACK */ /* Difference in this won't matter, both ACKed by the same cumul. ACK */
......
...@@ -1133,6 +1133,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) ...@@ -1133,6 +1133,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
unsigned int mss_now) unsigned int mss_now)
{ {
skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG;
if (skb->len <= mss_now || !sk_can_gso(sk) || if (skb->len <= mss_now || !sk_can_gso(sk) ||
skb->ip_summed == CHECKSUM_NONE) { skb->ip_summed == CHECKSUM_NONE) {
/* Avoid the costly divide in the normal /* Avoid the costly divide in the normal
...@@ -1140,11 +1141,10 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, ...@@ -1140,11 +1141,10 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
*/ */
skb_shinfo(skb)->gso_segs = 1; skb_shinfo(skb)->gso_segs = 1;
skb_shinfo(skb)->gso_size = 0; skb_shinfo(skb)->gso_size = 0;
skb_shinfo(skb)->gso_type = 0;
} else { } else {
skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now); skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
skb_shinfo(skb)->gso_size = mss_now; skb_shinfo(skb)->gso_size = mss_now;
skb_shinfo(skb)->gso_type = sk->sk_gso_type; skb_shinfo(skb)->gso_type |= sk->sk_gso_type;
} }
} }
......
...@@ -100,6 +100,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, ...@@ -100,6 +100,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
SKB_GSO_DODGY | SKB_GSO_DODGY |
SKB_GSO_TCP_ECN | SKB_GSO_TCP_ECN |
SKB_GSO_TCPV6 | SKB_GSO_TCPV6 |
SKB_GSO_SHARED_FRAG |
0))) 0)))
goto out; goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment