Commit d32aebfd authored by David S. Miller's avatar David S. Miller

Merge branch 'gso_forward'

Florian Westphal says:

====================
net: ip: push gso skb forwarding handling down the stack

Turns out doing the segmentation in forwarding was not a bright idea,
there are corner-cases where this has unintended side-effects.

This patch pushes the segmentation downwards.

After this, netif_skb_dev_features() function can be removed
again, it was only added to fetch the features of the output device,
we can just use skb->dev after the pushdown.

Tested with following setup:

host -> kvm_router  -> kvm_host
  mtu 1500        mtu1280

- 'host' has route to kvm_host with locked mtu of 1500
- gso/gro enabled on all interfaces

Did tests with all of following combinations:
- netfilter conntrack off and on on kvm_router
- virtio-net and e1000 driver on kvm_router
- tcp and udp bulk xmit from host to kvm_host

for tcp, I added TCPMSS mangling on kvm_host to make it lie about tcp mss.

Also added a dummy '-t mangle -A POSTROUTING -p udp -f'
rule to make sure no udp fragments are seen in the 'conntrack on'
and 'virtio-net' case.

Also checked (with ping -M do -s 1400)' that it still sends the wanted
icmp error message when size exceeds 1280.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 418a3156 c1e756bf
...@@ -3180,12 +3180,7 @@ void netdev_change_features(struct net_device *dev); ...@@ -3180,12 +3180,7 @@ void netdev_change_features(struct net_device *dev);
void netif_stacked_transfer_operstate(const struct net_device *rootdev, void netif_stacked_transfer_operstate(const struct net_device *rootdev,
struct net_device *dev); struct net_device *dev);
netdev_features_t netif_skb_dev_features(struct sk_buff *skb, netdev_features_t netif_skb_features(struct sk_buff *skb);
const struct net_device *dev);
static inline netdev_features_t netif_skb_features(struct sk_buff *skb)
{
return netif_skb_dev_features(skb, skb->dev);
}
static inline bool net_gso_ok(netdev_features_t features, int gso_type) static inline bool net_gso_ok(netdev_features_t features, int gso_type)
{ {
......
...@@ -2418,7 +2418,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); ...@@ -2418,7 +2418,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
* 2. No high memory really exists on this machine. * 2. No high memory really exists on this machine.
*/ */
static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb) static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
{ {
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
int i; int i;
...@@ -2493,7 +2493,6 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) ...@@ -2493,7 +2493,6 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
} }
static netdev_features_t harmonize_features(struct sk_buff *skb, static netdev_features_t harmonize_features(struct sk_buff *skb,
const struct net_device *dev,
netdev_features_t features) netdev_features_t features)
{ {
int tmp; int tmp;
...@@ -2501,30 +2500,29 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, ...@@ -2501,30 +2500,29 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
if (skb->ip_summed != CHECKSUM_NONE && if (skb->ip_summed != CHECKSUM_NONE &&
!can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) { !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) {
features &= ~NETIF_F_ALL_CSUM; features &= ~NETIF_F_ALL_CSUM;
} else if (illegal_highdma(dev, skb)) { } else if (illegal_highdma(skb->dev, skb)) {
features &= ~NETIF_F_SG; features &= ~NETIF_F_SG;
} }
return features; return features;
} }
netdev_features_t netif_skb_dev_features(struct sk_buff *skb, netdev_features_t netif_skb_features(struct sk_buff *skb)
const struct net_device *dev)
{ {
__be16 protocol = skb->protocol; __be16 protocol = skb->protocol;
netdev_features_t features = dev->features; netdev_features_t features = skb->dev->features;
if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs) if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
features &= ~NETIF_F_GSO_MASK; features &= ~NETIF_F_GSO_MASK;
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
protocol = veh->h_vlan_encapsulated_proto; protocol = veh->h_vlan_encapsulated_proto;
} else if (!vlan_tx_tag_present(skb)) { } else if (!vlan_tx_tag_present(skb)) {
return harmonize_features(skb, dev, features); return harmonize_features(skb, features);
} }
features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX); NETIF_F_HW_VLAN_STAG_TX);
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
...@@ -2532,9 +2530,9 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb, ...@@ -2532,9 +2530,9 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX; NETIF_F_HW_VLAN_STAG_TX;
return harmonize_features(skb, dev, features); return harmonize_features(skb, features);
} }
EXPORT_SYMBOL(netif_skb_dev_features); EXPORT_SYMBOL(netif_skb_features);
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq) struct netdev_queue *txq)
......
...@@ -56,53 +56,6 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) ...@@ -56,53 +56,6 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
return true; return true;
} }
static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb)
{
unsigned int mtu;
if (skb->local_df || !skb_is_gso(skb))
return false;
mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
/* if seglen > mtu, do software segmentation for IP fragmentation on
* output. DF bit cannot be set since ip_forward would have sent
* icmp error.
*/
return skb_gso_network_seglen(skb) > mtu;
}
/* called if GSO skb needs to be fragmented on forward */
static int ip_forward_finish_gso(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
netdev_features_t features;
struct sk_buff *segs;
int ret = 0;
features = netif_skb_dev_features(skb, dst->dev);
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
if (IS_ERR(segs)) {
kfree_skb(skb);
return -ENOMEM;
}
consume_skb(skb);
do {
struct sk_buff *nskb = segs->next;
int err;
segs->next = NULL;
err = dst_output(segs);
if (err && ret == 0)
ret = err;
segs = nskb;
} while (segs);
return ret;
}
static int ip_forward_finish(struct sk_buff *skb) static int ip_forward_finish(struct sk_buff *skb)
{ {
...@@ -114,9 +67,6 @@ static int ip_forward_finish(struct sk_buff *skb) ...@@ -114,9 +67,6 @@ static int ip_forward_finish(struct sk_buff *skb)
if (unlikely(opt->optlen)) if (unlikely(opt->optlen))
ip_forward_options(skb); ip_forward_options(skb);
if (ip_gso_exceeds_dst_mtu(skb))
return ip_forward_finish_gso(skb);
return dst_output(skb); return dst_output(skb);
} }
......
...@@ -211,6 +211,48 @@ static inline int ip_finish_output2(struct sk_buff *skb) ...@@ -211,6 +211,48 @@ static inline int ip_finish_output2(struct sk_buff *skb)
return -EINVAL; return -EINVAL;
} }
static int ip_finish_output_gso(struct sk_buff *skb)
{
netdev_features_t features;
struct sk_buff *segs;
int ret = 0;
/* common case: locally created skb or seglen is <= mtu */
if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb))
return ip_finish_output2(skb);
/* Slowpath - GSO segment length is exceeding the dst MTU.
*
* This can happen in two cases:
* 1) TCP GRO packet, DF bit not set
* 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly
* from host network stack.
*/
features = netif_skb_features(skb);
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
if (IS_ERR(segs)) {
kfree_skb(skb);
return -ENOMEM;
}
consume_skb(skb);
do {
struct sk_buff *nskb = segs->next;
int err;
segs->next = NULL;
err = ip_fragment(segs, ip_finish_output2);
if (err && ret == 0)
ret = err;
segs = nskb;
} while (segs);
return ret;
}
static int ip_finish_output(struct sk_buff *skb) static int ip_finish_output(struct sk_buff *skb)
{ {
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
...@@ -220,9 +262,12 @@ static int ip_finish_output(struct sk_buff *skb) ...@@ -220,9 +262,12 @@ static int ip_finish_output(struct sk_buff *skb)
return dst_output(skb); return dst_output(skb);
} }
#endif #endif
if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) if (skb_is_gso(skb))
return ip_finish_output_gso(skb);
if (skb->len > ip_skb_dst_mtu(skb))
return ip_fragment(skb, ip_finish_output2); return ip_fragment(skb, ip_finish_output2);
else
return ip_finish_output2(skb); return ip_finish_output2(skb);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment