Commit 25e379c4 authored by David S. Miller's avatar David S. Miller

Merge branch 'udp_gso'

Tom Herbert says:

====================
udp: Generalize GSO for UDP tunnels

This patch set generalizes the UDP tunnel segmentation functions so
that they can work with various protocol encapsulations. The primary
change is to set the inner_protocol field in the skbuff when creating
the encapsulated packet, and then in skb_udp_tunnel_segment this data
is used to determine the function for segmenting the encapsulated
packet. The inner_protocol field is overloaded to take either an
Ethertype or IP protocol.

The inner_protocol is set on transmit using skb_set_inner_ipproto or
skb_set_inner_protocol functions. VXLAN and IP tunnels (for fou GSO)
were modified to call these.

Notes:
  - GSO for GRE/UDP where GRE checksum is enabled does not work.
    Handling this will require some special case code.
  - Software GSO now supports many varieties of encapsulation with
    SKB_GSO_UDP_TUNNEL{_CSUM}. We still need a mechanism to query
    for device support of particular combinations (I intend to
    add ndo_gso_check for that).
  - MPLS seems to be the only previous user of inner_protocol. I don't
    believe these patches can affect that. For supporting GSO with
    MPLS over UDP, the inner_protocol should be set using the
    helper functions in this patch.
  - GSO for L2TP/UDP should also be straightforward now.

v2:
  - Respin for Eric's restructuring of skbuff.

Tested GRE, IPIP, and SIT over fou as well as VLXAN. This was
done using 200 TCP_STREAMs in netperf.

 GRE
    IPv4, FOU, UDP checksum enabled
      TCP_STREAM TSO enabled on tun interface
        14.04% TX CPU utilization
        13.17% RX CPU utilization
        9211 Mbps
      TCP_STREAM TSO disabled on tun interface
        27.82% TX CPU utilization
        25.41% RX CPU utilization
        9336 Mbps
    IPv4, FOU, UDP checksum disabled
      TCP_STREAM TSO enabled on tun interface
        13.14% TX CPU utilization
        23.18% RX CPU utilization
        9277 Mbps
      TCP_STREAM TSO disabled on tun interface
        30.00% TX CPU utilization
        31.28% RX CPU utilization
        9327 Mbps

  IPIP
    FOU, UDP checksum enabled
      TCP_STREAM TSO enabled on tun interface
        15.28% TX CPU utilization
        13.92% RX CPU utilization
        9342 Mbps
      TCP_STREAM TSO disabled on tun interface
        27.82% TX CPU utilization
        25.41% RX CPU utilization
        9336 Mbps
    FOU, UDP checksum disabled
      TCP_STREAM TSO enabled on tun interface
        15.08% TX CPU utilization
        24.64% RX CPU utilization
        9226 Mbps
      TCP_STREAM TSO disabled on tun interface
        30.00% TX CPU utilization
        31.28% RX CPU utilization
        9327 Mbps

  SIT
    FOU, UDP checksum enabled
      TCP_STREAM TSO enabled on tun interface
        14.47% TX CPU utilization
        14.58% RX CPU utilization
        9106 Mbps
      TCP_STREAM TSO disabled on tun interface
        31.82% TX CPU utilization
        30.82% RX CPU utilization
        9204 Mbps
    FOU, UDP checksum disabled
      TCP_STREAM TSO enabled on tun interface
        15.70% TX CPU utilization
        27.93% RX CPU utilization
        9097 Mbps
      TCP_STREAM TSO disabled on tun interface
        33.48% TX CPU utilization
        37.36% RX CPU utilization
        9197 Mbps

   VXLAN
      TCP_STREAM TSO enabled on tun interface
        16.42% TX CPU utilization
        23.66% RX CPU utilization
        9081 Mbps
      TCP_STREAM TSO disabled on tun interface
        30.32% TX CPU utilization
        30.55% RX CPU utilization
        9185 Mbps

   Baseline (no encp, TSO and LRO enabled)
      TCP_STREAM
        11.85% TX CPU utilization
        15.13% RX CPU utilization
        9452 Mbps
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents f44d61cd 996c9fd1
...@@ -1610,6 +1610,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, ...@@ -1610,6 +1610,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
vxh->vx_flags = htonl(VXLAN_FLAGS); vxh->vx_flags = htonl(VXLAN_FLAGS);
vxh->vx_vni = vni; vxh->vx_vni = vni;
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio, udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
ttl, src_port, dst_port); ttl, src_port, dst_port);
return 0; return 0;
...@@ -1652,6 +1654,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, ...@@ -1652,6 +1654,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
vxh->vx_flags = htonl(VXLAN_FLAGS); vxh->vx_flags = htonl(VXLAN_FLAGS);
vxh->vx_vni = vni; vxh->vx_vni = vni;
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos, return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
ttl, df, src_port, dst_port, xnet); ttl, df, src_port, dst_port, xnet);
} }
......
...@@ -596,7 +596,8 @@ struct sk_buff { ...@@ -596,7 +596,8 @@ struct sk_buff {
__u8 ndisc_nodetype:2; __u8 ndisc_nodetype:2;
#endif #endif
__u8 ipvs_property:1; __u8 ipvs_property:1;
/* 5 or 7 bit hole */ __u8 inner_protocol_type:1;
/* 4 or 6 bit hole */
#ifdef CONFIG_NET_SCHED #ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */ __u16 tc_index; /* traffic control index */
...@@ -632,7 +633,11 @@ struct sk_buff { ...@@ -632,7 +633,11 @@ struct sk_buff {
__u32 reserved_tailroom; __u32 reserved_tailroom;
}; };
__be16 inner_protocol; union {
__be16 inner_protocol;
__u8 inner_ipproto;
};
__u16 inner_transport_header; __u16 inner_transport_header;
__u16 inner_network_header; __u16 inner_network_header;
__u16 inner_mac_header; __u16 inner_mac_header;
...@@ -1762,6 +1767,23 @@ static inline void skb_reserve(struct sk_buff *skb, int len) ...@@ -1762,6 +1767,23 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
skb->tail += len; skb->tail += len;
} }
#define ENCAP_TYPE_ETHER 0
#define ENCAP_TYPE_IPPROTO 1
static inline void skb_set_inner_protocol(struct sk_buff *skb,
__be16 protocol)
{
skb->inner_protocol = protocol;
skb->inner_protocol_type = ENCAP_TYPE_ETHER;
}
static inline void skb_set_inner_ipproto(struct sk_buff *skb,
__u8 ipproto)
{
skb->inner_ipproto = ipproto;
skb->inner_protocol_type = ENCAP_TYPE_IPPROTO;
}
static inline void skb_reset_inner_headers(struct sk_buff *skb) static inline void skb_reset_inner_headers(struct sk_buff *skb)
{ {
skb->inner_mac_header = skb->mac_header; skb->inner_mac_header = skb->mac_header;
......
...@@ -239,7 +239,8 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); ...@@ -239,7 +239,8 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
int udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags);
unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait); unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait);
struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features); netdev_features_t features,
bool is_ipv6);
int udp_lib_getsockopt(struct sock *sk, int level, int optname, int udp_lib_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen); char __user *optval, int __user *optlen);
int udp_lib_setsockopt(struct sock *sk, int level, int optname, int udp_lib_setsockopt(struct sock *sk, int level, int optname,
......
...@@ -241,6 +241,8 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, ...@@ -241,6 +241,8 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
/* Push GRE header. */ /* Push GRE header. */
gre_build_header(skb, &tpi, tunnel->tun_hlen); gre_build_header(skb, &tpi, tunnel->tun_hlen);
skb_set_inner_protocol(skb, tpi.proto);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
} }
......
...@@ -224,6 +224,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -224,6 +224,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (IS_ERR(skb)) if (IS_ERR(skb))
goto out; goto out;
skb_set_inner_ipproto(skb, IPPROTO_IPIP);
ip_tunnel_xmit(skb, dev, tiph, tiph->protocol); ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
return NETDEV_TX_OK; return NETDEV_TX_OK;
......
...@@ -25,8 +25,11 @@ struct udp_offload_priv { ...@@ -25,8 +25,11 @@ struct udp_offload_priv {
struct udp_offload_priv __rcu *next; struct udp_offload_priv __rcu *next;
}; };
struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features) netdev_features_t features,
struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
netdev_features_t features),
__be16 new_protocol)
{ {
struct sk_buff *segs = ERR_PTR(-EINVAL); struct sk_buff *segs = ERR_PTR(-EINVAL);
u16 mac_offset = skb->mac_header; u16 mac_offset = skb->mac_header;
...@@ -48,7 +51,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, ...@@ -48,7 +51,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
skb_set_network_header(skb, skb_inner_network_offset(skb)); skb_set_network_header(skb, skb_inner_network_offset(skb));
skb->mac_len = skb_inner_network_offset(skb); skb->mac_len = skb_inner_network_offset(skb);
skb->protocol = htons(ETH_P_TEB); skb->protocol = new_protocol;
need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
if (need_csum) if (need_csum)
...@@ -56,7 +59,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, ...@@ -56,7 +59,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
/* segment inner packet. */ /* segment inner packet. */
enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
segs = skb_mac_gso_segment(skb, enc_features); segs = gso_inner_segment(skb, enc_features);
if (IS_ERR_OR_NULL(segs)) { if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
mac_len); mac_len);
...@@ -101,6 +104,44 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, ...@@ -101,6 +104,44 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
return segs; return segs;
} }
struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features,
bool is_ipv6)
{
__be16 protocol = skb->protocol;
const struct net_offload **offloads;
const struct net_offload *ops;
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
netdev_features_t features);
rcu_read_lock();
switch (skb->inner_protocol_type) {
case ENCAP_TYPE_ETHER:
protocol = skb->inner_protocol;
gso_inner_segment = skb_mac_gso_segment;
break;
case ENCAP_TYPE_IPPROTO:
offloads = is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[skb->inner_ipproto]);
if (!ops || !ops->callbacks.gso_segment)
goto out_unlock;
gso_inner_segment = ops->callbacks.gso_segment;
break;
default:
goto out_unlock;
}
segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
protocol);
out_unlock:
rcu_read_unlock();
return segs;
}
static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
netdev_features_t features) netdev_features_t features)
{ {
...@@ -113,7 +154,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, ...@@ -113,7 +154,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (skb->encapsulation && if (skb->encapsulation &&
(skb_shinfo(skb)->gso_type & (skb_shinfo(skb)->gso_type &
(SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
segs = skb_udp_tunnel_segment(skb, features); segs = skb_udp_tunnel_segment(skb, features, false);
goto out; goto out;
} }
......
...@@ -616,6 +616,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, ...@@ -616,6 +616,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
int err = -1; int err = -1;
u8 proto; u8 proto;
struct sk_buff *new_skb; struct sk_buff *new_skb;
__be16 protocol;
if (dev->type == ARPHRD_ETHER) if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0; IPCB(skb)->flags = 0;
...@@ -732,8 +733,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, ...@@ -732,8 +733,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
ipv6h->daddr = fl6->daddr; ipv6h->daddr = fl6->daddr;
((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags; ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags;
((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ? protocol = (dev->type == ARPHRD_ETHER) ?
htons(ETH_P_TEB) : skb->protocol; htons(ETH_P_TEB) : skb->protocol;
((__be16 *)(ipv6h + 1))[1] = protocol;
if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
__be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4); __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4);
...@@ -754,6 +756,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, ...@@ -754,6 +756,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
} }
} }
skb_set_inner_protocol(skb, protocol);
ip6tunnel_xmit(skb, dev); ip6tunnel_xmit(skb, dev);
if (ndst) if (ndst)
ip6_tnl_dst_store(tunnel, ndst); ip6_tnl_dst_store(tunnel, ndst);
......
...@@ -982,6 +982,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, ...@@ -982,6 +982,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto tx_error; goto tx_error;
} }
skb_set_inner_ipproto(skb, IPPROTO_IPV6);
err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
protocol, tos, ttl, df, protocol, tos, ttl, df,
!net_eq(tunnel->net, dev_net(dev))); !net_eq(tunnel->net, dev_net(dev)));
...@@ -1006,6 +1008,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1006,6 +1008,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (IS_ERR(skb)) if (IS_ERR(skb))
goto out; goto out;
skb_set_inner_ipproto(skb, IPPROTO_IPIP);
ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
return NETDEV_TX_OK; return NETDEV_TX_OK;
out: out:
......
...@@ -58,7 +58,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, ...@@ -58,7 +58,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (skb->encapsulation && skb_shinfo(skb)->gso_type & if (skb->encapsulation && skb_shinfo(skb)->gso_type &
(SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
segs = skb_udp_tunnel_segment(skb, features); segs = skb_udp_tunnel_segment(skb, features, true);
else { else {
const struct ipv6hdr *ipv6h; const struct ipv6hdr *ipv6h;
struct udphdr *uh; struct udphdr *uh;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment