Commit 80223d51 authored by Alexey Kuznetsov's avatar Alexey Kuznetsov Committed by David S. Miller

[NET]: Add TCP segmentation offload core infrastructure.

parent 22101408
...@@ -49,11 +49,72 @@ ...@@ -49,11 +49,72 @@
#include <linux/etherdevice.h> #include <linux/etherdevice.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/checksum.h>
#include <linux/if_ether.h> /* For the statistics structure. */ #include <linux/if_ether.h> /* For the statistics structure. */
#include <linux/if_arp.h> /* For ARPHRD_ETHER */ #include <linux/if_arp.h> /* For ARPHRD_ETHER */
#include <linux/ip.h>
#include <linux/tcp.h>
#define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16) #define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16)
/* KISS: just allocate small chunks and copy bits.
*
* So, in fact, this is documentation, explaining what we expect
* of largesending device modulo TCP checksum, which is ignored for loopback.
*/
static void emulate_large_send_offload(struct sk_buff *skb)
{
struct iphdr *iph = skb->nh.iph;
struct tcphdr *th = (struct tcphdr*)(skb->nh.raw + (iph->ihl * 4));
unsigned int doffset = (iph->ihl + th->doff) * 4;
unsigned int mtu = skb_shinfo(skb)->tso_size + doffset;
unsigned int offset = 0;
u32 seq = ntohl(th->seq);
u16 id = ntohs(iph->id);
while (offset + doffset < skb->len) {
unsigned int frag_size = min(mtu, skb->len - offset) - doffset;
struct sk_buff *nskb = alloc_skb(mtu + 32, GFP_ATOMIC);
if (!nskb)
break;
skb_reserve(nskb, 32);
nskb->mac.raw = nskb->data - 14;
nskb->nh.raw = nskb->data;
iph = nskb->nh.iph;
memcpy(nskb->data, skb->nh.raw, doffset);
if (skb_copy_bits(skb,
doffset + offset,
nskb->data + doffset,
frag_size))
BUG();
skb_put(nskb, doffset + frag_size);
nskb->ip_summed = CHECKSUM_UNNECESSARY;
nskb->dev = skb->dev;
nskb->priority = skb->priority;
nskb->protocol = skb->protocol;
nskb->dst = dst_clone(skb->dst);
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
nskb->pkt_type = skb->pkt_type;
th = (struct tcphdr*)(nskb->nh.raw + iph->ihl*4);
iph->tot_len = htons(frag_size + doffset);
iph->id = htons(id);
iph->check = 0;
iph->check = ip_fast_csum((unsigned char *) iph, iph->ihl);
th->seq = htonl(seq);
if (offset + doffset + frag_size < skb->len)
th->fin = th->psh = 0;
netif_rx(nskb);
offset += frag_size;
seq += frag_size;
id++;
}
dev_kfree_skb(skb);
}
/* /*
* The higher levels take care of making this non-reentrant (it's * The higher levels take care of making this non-reentrant (it's
* called with bh's disabled). * called with bh's disabled).
...@@ -86,6 +147,18 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -86,6 +147,18 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
skb->ip_summed = CHECKSUM_UNNECESSARY; skb->ip_summed = CHECKSUM_UNNECESSARY;
#endif #endif
if (skb_shinfo(skb)->tso_size) {
struct iphdr *iph = skb->nh.iph;
if (skb->protocol != htons(ETH_P_IP))
BUG();
if (iph->protocol != IPPROTO_TCP)
BUG();
emulate_large_send_offload(skb);
return 0;
}
dev->last_rx = jiffies; dev->last_rx = jiffies;
stats->rx_bytes+=skb->len; stats->rx_bytes+=skb->len;
stats->tx_bytes+=skb->len; stats->tx_bytes+=skb->len;
...@@ -117,6 +190,12 @@ int __init loopback_init(struct net_device *dev) ...@@ -117,6 +190,12 @@ int __init loopback_init(struct net_device *dev)
dev->rebuild_header = eth_rebuild_header; dev->rebuild_header = eth_rebuild_header;
dev->flags = IFF_LOOPBACK; dev->flags = IFF_LOOPBACK;
dev->features = NETIF_F_SG|NETIF_F_FRAGLIST|NETIF_F_NO_CSUM|NETIF_F_HIGHDMA; dev->features = NETIF_F_SG|NETIF_F_FRAGLIST|NETIF_F_NO_CSUM|NETIF_F_HIGHDMA;
/* Current netfilter will die with oom linearizing large skbs,
* however this will be cured before 2.5.x is done.
*/
dev->features |= NETIF_F_TSO;
dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL); dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
if (dev->priv == NULL) if (dev->priv == NULL)
return -ENOMEM; return -ENOMEM;
......
...@@ -365,6 +365,7 @@ struct net_device ...@@ -365,6 +365,7 @@ struct net_device
#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ #define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */
#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ #define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */
/* Called after device is detached from network. */ /* Called after device is detached from network. */
void (*uninit)(struct net_device *dev); void (*uninit)(struct net_device *dev);
......
...@@ -109,7 +109,8 @@ struct sk_buff_head { ...@@ -109,7 +109,8 @@ struct sk_buff_head {
struct sk_buff; struct sk_buff;
#define MAX_SKB_FRAGS 6 /* To allow 64K frame to be packed as single skb without frag_list */
#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)
typedef struct skb_frag_struct skb_frag_t; typedef struct skb_frag_struct skb_frag_t;
...@@ -125,6 +126,8 @@ struct skb_frag_struct { ...@@ -125,6 +126,8 @@ struct skb_frag_struct {
struct skb_shared_info { struct skb_shared_info {
atomic_t dataref; atomic_t dataref;
unsigned int nr_frags; unsigned int nr_frags;
unsigned short tso_size;
unsigned short tso_segs;
struct sk_buff *frag_list; struct sk_buff *frag_list;
skb_frag_t frags[MAX_SKB_FRAGS]; skb_frag_t frags[MAX_SKB_FRAGS];
}; };
......
...@@ -209,6 +209,8 @@ struct sk_buff *alloc_skb(unsigned int size, int gfp_mask) ...@@ -209,6 +209,8 @@ struct sk_buff *alloc_skb(unsigned int size, int gfp_mask)
atomic_set(&skb->users, 1); atomic_set(&skb->users, 1);
atomic_set(&(skb_shinfo(skb)->dataref), 1); atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0; skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->tso_size = 0;
skb_shinfo(skb)->tso_segs = 0;
skb_shinfo(skb)->frag_list = NULL; skb_shinfo(skb)->frag_list = NULL;
out: out:
return skb; return skb;
...@@ -490,6 +492,7 @@ int skb_linearize(struct sk_buff *skb, int gfp_mask) ...@@ -490,6 +492,7 @@ int skb_linearize(struct sk_buff *skb, int gfp_mask)
unsigned int size; unsigned int size;
u8 *data; u8 *data;
long offset; long offset;
struct skb_shared_info *ninfo;
int headerlen = skb->data - skb->head; int headerlen = skb->data - skb->head;
int expand = (skb->tail + skb->data_len) - skb->end; int expand = (skb->tail + skb->data_len) - skb->end;
...@@ -509,6 +512,14 @@ int skb_linearize(struct sk_buff *skb, int gfp_mask) ...@@ -509,6 +512,14 @@ int skb_linearize(struct sk_buff *skb, int gfp_mask)
if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len)) if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
BUG(); BUG();
/* Set up shinfo */
ninfo = (struct skb_shared_info*)(data + size);
atomic_set(&ninfo->dataref, 1);
ninfo->tso_size = skb_shinfo(skb)->tso_size;
ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
ninfo->nr_frags = 0;
ninfo->frag_list = NULL;
/* Offset between the two in bytes */ /* Offset between the two in bytes */
offset = data - skb->head; offset = data - skb->head;
...@@ -525,11 +536,6 @@ int skb_linearize(struct sk_buff *skb, int gfp_mask) ...@@ -525,11 +536,6 @@ int skb_linearize(struct sk_buff *skb, int gfp_mask)
skb->tail += offset; skb->tail += offset;
skb->data += offset; skb->data += offset;
/* Set up shinfo */
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->frag_list = NULL;
/* We are no longer a clone, even if we were. */ /* We are no longer a clone, even if we were. */
skb->cloned = 0; skb->cloned = 0;
...@@ -583,6 +589,8 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) ...@@ -583,6 +589,8 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
} }
skb_shinfo(n)->nr_frags = i; skb_shinfo(n)->nr_frags = i;
} }
skb_shinfo(n)->tso_size = skb_shinfo(skb)->tso_size;
skb_shinfo(n)->tso_segs = skb_shinfo(skb)->tso_segs;
if (skb_shinfo(skb)->frag_list) { if (skb_shinfo(skb)->frag_list) {
skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
...@@ -694,6 +702,9 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) ...@@ -694,6 +702,9 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
* *
* You must pass %GFP_ATOMIC as the allocation priority if this function * You must pass %GFP_ATOMIC as the allocation priority if this function
* is called from an interrupt. * is called from an interrupt.
*
* BUG ALERT: ip_summed is not copied. Why does this work? Is it used
* only by netfilter in the cases when checksum is recalculated? --ANK
*/ */
struct sk_buff *skb_copy_expand(const struct sk_buff *skb, struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
int newheadroom, int newtailroom, int gfp_mask) int newheadroom, int newtailroom, int gfp_mask)
...@@ -716,6 +727,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, ...@@ -716,6 +727,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
BUG(); BUG();
copy_skb_header(n, skb); copy_skb_header(n, skb);
skb_shinfo(n)->tso_size = skb_shinfo(skb)->tso_size;
skb_shinfo(n)->tso_segs = skb_shinfo(skb)->tso_segs;
return n; return n;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment