Commit 2eba61d5 authored by Paul Durrant's avatar Paul Durrant Committed by David S. Miller

xen-netback: add support for IPv6 checksum offload from guest

For performance of VM to VM traffic on a single host it is better to avoid
calculation of TCP/UDP checksum in the sending frontend. To allow this this
patch adds the code necessary to set up partial checksum for IPv6 packets
and xenstore flag feature-ipv6-csum-offload to advertise that fact to
frontends.
Signed-off-by: default avatarPaul Durrant <paul.durrant@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 146c8a77
...@@ -109,15 +109,12 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif, ...@@ -109,15 +109,12 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif,
return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
} }
/* /* This is a miniumum size for the linear area to avoid lots of
* This is the amount of packet we copy rather than map, so that the * calls to __pskb_pull_tail() as we set up checksum offsets. The
* guest can't fiddle with the contents of the headers while we do * value 128 was chosen as it covers all IPv4 and most likely
* packet processing on them (netfilter, routing, etc). * IPv6 headers.
*/ */
#define PKT_PROT_LEN (ETH_HLEN + \ #define PKT_PROT_LEN 128
VLAN_HLEN + \
sizeof(struct iphdr) + MAX_IPOPTLEN + \
sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
static u16 frag_get_pending_idx(skb_frag_t *frag) static u16 frag_get_pending_idx(skb_frag_t *frag)
{ {
...@@ -1118,61 +1115,74 @@ static int xenvif_set_skb_gso(struct xenvif *vif, ...@@ -1118,61 +1115,74 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
return 0; return 0;
} }
static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len)
{
if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
/* If we need to pullup then pullup to the max, so we
* won't need to do it again.
*/
int target = min_t(int, skb->len, MAX_TCP_HEADER);
__pskb_pull_tail(skb, target - skb_headlen(skb));
}
}
static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
int recalculate_partial_csum)
{ {
struct iphdr *iph; struct iphdr *iph = (void *)skb->data;
unsigned int header_size;
unsigned int off;
int err = -EPROTO; int err = -EPROTO;
int recalculate_partial_csum = 0;
/* off = sizeof(struct iphdr);
* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
* peers can fail to set NETRXF_csum_blank when sending a GSO
* frame. In this case force the SKB to CHECKSUM_PARTIAL and
* recalculate the partial checksum.
*/
if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
vif->rx_gso_checksum_fixup++;
skb->ip_summed = CHECKSUM_PARTIAL;
recalculate_partial_csum = 1;
}
/* A non-CHECKSUM_PARTIAL SKB does not require setup. */ header_size = skb->network_header + off + MAX_IPOPTLEN;
if (skb->ip_summed != CHECKSUM_PARTIAL) maybe_pull_tail(skb, header_size);
return 0;
if (skb->protocol != htons(ETH_P_IP)) off = iph->ihl * 4;
goto out;
iph = (void *)skb->data;
switch (iph->protocol) { switch (iph->protocol) {
case IPPROTO_TCP: case IPPROTO_TCP:
if (!skb_partial_csum_set(skb, 4 * iph->ihl, if (!skb_partial_csum_set(skb, off,
offsetof(struct tcphdr, check))) offsetof(struct tcphdr, check)))
goto out; goto out;
if (recalculate_partial_csum) { if (recalculate_partial_csum) {
struct tcphdr *tcph = tcp_hdr(skb); struct tcphdr *tcph = tcp_hdr(skb);
header_size = skb->network_header +
off +
sizeof(struct tcphdr);
maybe_pull_tail(skb, header_size);
tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
skb->len - iph->ihl*4, skb->len - off,
IPPROTO_TCP, 0); IPPROTO_TCP, 0);
} }
break; break;
case IPPROTO_UDP: case IPPROTO_UDP:
if (!skb_partial_csum_set(skb, 4 * iph->ihl, if (!skb_partial_csum_set(skb, off,
offsetof(struct udphdr, check))) offsetof(struct udphdr, check)))
goto out; goto out;
if (recalculate_partial_csum) { if (recalculate_partial_csum) {
struct udphdr *udph = udp_hdr(skb); struct udphdr *udph = udp_hdr(skb);
header_size = skb->network_header +
off +
sizeof(struct udphdr);
maybe_pull_tail(skb, header_size);
udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
skb->len - iph->ihl*4, skb->len - off,
IPPROTO_UDP, 0); IPPROTO_UDP, 0);
} }
break; break;
default: default:
if (net_ratelimit()) if (net_ratelimit())
netdev_err(vif->dev, netdev_err(vif->dev,
"Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n", "Attempting to checksum a non-TCP/UDP packet, "
"dropping a protocol %d packet\n",
iph->protocol); iph->protocol);
goto out; goto out;
} }
...@@ -1183,6 +1193,158 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) ...@@ -1183,6 +1193,158 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
return err; return err;
} }
static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
int recalculate_partial_csum)
{
int err = -EPROTO;
struct ipv6hdr *ipv6h = (void *)skb->data;
u8 nexthdr;
unsigned int header_size;
unsigned int off;
bool fragment;
bool done;
done = false;
off = sizeof(struct ipv6hdr);
header_size = skb->network_header + off;
maybe_pull_tail(skb, header_size);
nexthdr = ipv6h->nexthdr;
while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) &&
!done) {
switch (nexthdr) {
case IPPROTO_DSTOPTS:
case IPPROTO_HOPOPTS:
case IPPROTO_ROUTING: {
struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
header_size = skb->network_header +
off +
sizeof(struct ipv6_opt_hdr);
maybe_pull_tail(skb, header_size);
nexthdr = hp->nexthdr;
off += ipv6_optlen(hp);
break;
}
case IPPROTO_AH: {
struct ip_auth_hdr *hp = (void *)(skb->data + off);
header_size = skb->network_header +
off +
sizeof(struct ip_auth_hdr);
maybe_pull_tail(skb, header_size);
nexthdr = hp->nexthdr;
off += (hp->hdrlen+2)<<2;
break;
}
case IPPROTO_FRAGMENT:
fragment = true;
/* fall through */
default:
done = true;
break;
}
}
if (!done) {
if (net_ratelimit())
netdev_err(vif->dev, "Failed to parse packet header\n");
goto out;
}
if (fragment) {
if (net_ratelimit())
netdev_err(vif->dev, "Packet is a fragment!\n");
goto out;
}
switch (nexthdr) {
case IPPROTO_TCP:
if (!skb_partial_csum_set(skb, off,
offsetof(struct tcphdr, check)))
goto out;
if (recalculate_partial_csum) {
struct tcphdr *tcph = tcp_hdr(skb);
header_size = skb->network_header +
off +
sizeof(struct tcphdr);
maybe_pull_tail(skb, header_size);
tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
&ipv6h->daddr,
skb->len - off,
IPPROTO_TCP, 0);
}
break;
case IPPROTO_UDP:
if (!skb_partial_csum_set(skb, off,
offsetof(struct udphdr, check)))
goto out;
if (recalculate_partial_csum) {
struct udphdr *udph = udp_hdr(skb);
header_size = skb->network_header +
off +
sizeof(struct udphdr);
maybe_pull_tail(skb, header_size);
udph->check = ~csum_ipv6_magic(&ipv6h->saddr,
&ipv6h->daddr,
skb->len - off,
IPPROTO_UDP, 0);
}
break;
default:
if (net_ratelimit())
netdev_err(vif->dev,
"Attempting to checksum a non-TCP/UDP packet, "
"dropping a protocol %d packet\n",
nexthdr);
goto out;
}
err = 0;
out:
return err;
}
static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
{
int err = -EPROTO;
int recalculate_partial_csum = 0;
/* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
* peers can fail to set NETRXF_csum_blank when sending a GSO
* frame. In this case force the SKB to CHECKSUM_PARTIAL and
* recalculate the partial checksum.
*/
if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
vif->rx_gso_checksum_fixup++;
skb->ip_summed = CHECKSUM_PARTIAL;
recalculate_partial_csum = 1;
}
/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
if (skb->ip_summed != CHECKSUM_PARTIAL)
return 0;
if (skb->protocol == htons(ETH_P_IP))
err = checksum_setup_ip(vif, skb, recalculate_partial_csum);
else if (skb->protocol == htons(ETH_P_IPV6))
err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum);
return err;
}
static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
{ {
unsigned long now = jiffies; unsigned long now = jiffies;
...@@ -1428,12 +1590,7 @@ static int xenvif_tx_submit(struct xenvif *vif, int budget) ...@@ -1428,12 +1590,7 @@ static int xenvif_tx_submit(struct xenvif *vif, int budget)
xenvif_fill_frags(vif, skb); xenvif_fill_frags(vif, skb);
/* if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
* If the initial fragment was < PKT_PROT_LEN then
* pull through some bytes from the other fragments to
* increase the linear region to PKT_PROT_LEN bytes.
*/
if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
int target = min_t(int, skb->len, PKT_PROT_LEN); int target = min_t(int, skb->len, PKT_PROT_LEN);
__pskb_pull_tail(skb, target - skb_headlen(skb)); __pskb_pull_tail(skb, target - skb_headlen(skb));
} }
......
...@@ -105,6 +105,15 @@ static int netback_probe(struct xenbus_device *dev, ...@@ -105,6 +105,15 @@ static int netback_probe(struct xenbus_device *dev,
goto abort_transaction; goto abort_transaction;
} }
/* We support partial checksum setup for IPv6 packets */
err = xenbus_printf(xbt, dev->nodename,
"feature-ipv6-csum-offload",
"%d", 1);
if (err) {
message = "writing feature-ipv6-csum-offload";
goto abort_transaction;
}
/* We support rx-copy path. */ /* We support rx-copy path. */
err = xenbus_printf(xbt, dev->nodename, err = xenbus_printf(xbt, dev->nodename,
"feature-rx-copy", "%d", 1); "feature-rx-copy", "%d", 1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment