Commit d5ae22d9 authored by David S. Miller's avatar David S. Miller

Merge

parents 5e051f29 fe4f70cf
...@@ -63,6 +63,7 @@ ...@@ -63,6 +63,7 @@
#include <net/pkt_sched.h> #include <net/pkt_sched.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/reboot.h> #include <linux/reboot.h>
#include <net/checksum.h>
#include <linux/tqueue.h> #include <linux/tqueue.h>
#include <linux/ethtool.h> #include <linux/ethtool.h>
#include <linux/if_vlan.h> #include <linux/if_vlan.h>
......
...@@ -427,6 +427,11 @@ e1000_probe(struct pci_dev *pdev, ...@@ -427,6 +427,11 @@ e1000_probe(struct pci_dev *pdev,
netdev->features = NETIF_F_SG; netdev->features = NETIF_F_SG;
} }
#ifdef NETIF_F_TSO
if(adapter->hw.mac_type >= e1000_82544)
netdev->features |= NETIF_F_TSO;
#endif
if(pci_using_dac) if(pci_using_dac)
netdev->features |= NETIF_F_HIGHDMA; netdev->features |= NETIF_F_HIGHDMA;
...@@ -1284,9 +1289,62 @@ e1000_watchdog(unsigned long data) ...@@ -1284,9 +1289,62 @@ e1000_watchdog(unsigned long data)
#define E1000_TX_FLAGS_CSUM 0x00000001 #define E1000_TX_FLAGS_CSUM 0x00000001
#define E1000_TX_FLAGS_VLAN 0x00000002 #define E1000_TX_FLAGS_VLAN 0x00000002
#define E1000_TX_FLAGS_TSO 0x00000004
#define E1000_TX_FLAGS_VLAN_MASK 0xffff0000 #define E1000_TX_FLAGS_VLAN_MASK 0xffff0000
#define E1000_TX_FLAGS_VLAN_SHIFT 16 #define E1000_TX_FLAGS_VLAN_SHIFT 16
static inline boolean_t
e1000_tso(struct e1000_adapter *adapter, struct sk_buff *skb, int tx_flags)
{
#ifdef NETIF_F_TSO
struct e1000_context_desc *context_desc;
int i;
uint8_t ipcss, ipcso, tucss, tucso, hdr_len;
uint16_t ipcse, tucse, mss;
if(skb_shinfo(skb)->tso_size) {
hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
mss = skb_shinfo(skb)->tso_size;
skb->nh.iph->tot_len = 0;
skb->nh.iph->check = 0;
skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr,
skb->nh.iph->daddr,
0,
IPPROTO_TCP,
0);
ipcss = skb->nh.raw - skb->data;
ipcso = (void *)&(skb->nh.iph->check) - (void *)skb->data;
ipcse = skb->h.raw - skb->data - 1;
tucss = skb->h.raw - skb->data;
tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
tucse = 0;
i = adapter->tx_ring.next_to_use;
context_desc = E1000_CONTEXT_DESC(adapter->tx_ring, i);
context_desc->lower_setup.ip_fields.ipcss = ipcss;
context_desc->lower_setup.ip_fields.ipcso = ipcso;
context_desc->lower_setup.ip_fields.ipcse = cpu_to_le16(ipcse);
context_desc->upper_setup.tcp_fields.tucss = tucss;
context_desc->upper_setup.tcp_fields.tucso = tucso;
context_desc->upper_setup.tcp_fields.tucse = cpu_to_le16(tucse);
context_desc->tcp_seg_setup.fields.mss = cpu_to_le16(mss);
context_desc->tcp_seg_setup.fields.hdr_len = hdr_len;
context_desc->cmd_and_length = cpu_to_le32(adapter->txd_cmd |
E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE |
E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP |
(skb->len - (hdr_len)));
i = (i + 1) % adapter->tx_ring.count;
adapter->tx_ring.next_to_use = i;
return TRUE;
}
#endif
return FALSE;
}
static inline boolean_t static inline boolean_t
e1000_tx_csum(struct e1000_adapter *adapter, struct sk_buff *skb) e1000_tx_csum(struct e1000_adapter *adapter, struct sk_buff *skb)
{ {
...@@ -1386,6 +1444,12 @@ e1000_tx_queue(struct e1000_adapter *adapter, int count, int tx_flags) ...@@ -1386,6 +1444,12 @@ e1000_tx_queue(struct e1000_adapter *adapter, int count, int tx_flags)
txd_upper = 0; txd_upper = 0;
txd_lower = adapter->txd_cmd; txd_lower = adapter->txd_cmd;
if(tx_flags & E1000_TX_FLAGS_TSO) {
txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D |
E1000_TXD_CMD_TSE;
txd_upper |= (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
}
if(tx_flags & E1000_TX_FLAGS_CSUM) { if(tx_flags & E1000_TX_FLAGS_CSUM) {
txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
txd_upper |= E1000_TXD_POPTS_TXSM << 8; txd_upper |= E1000_TXD_POPTS_TXSM << 8;
...@@ -1435,22 +1499,29 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) ...@@ -1435,22 +1499,29 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
for(f = 0; f < skb_shinfo(skb)->nr_frags; f++) for(f = 0; f < skb_shinfo(skb)->nr_frags; f++)
count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size, count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size,
adapter->max_data_per_txd); adapter->max_data_per_txd);
#ifdef NETIF_F_TSO
if((skb_shinfo(skb)->tso_size) || (skb->ip_summed == CHECKSUM_HW))
count++;
#else
if(skb->ip_summed == CHECKSUM_HW) if(skb->ip_summed == CHECKSUM_HW)
count++; count++;
#endif
if(E1000_DESC_UNUSED(&adapter->tx_ring) < count) { if(E1000_DESC_UNUSED(&adapter->tx_ring) < count) {
netif_stop_queue(netdev); netif_stop_queue(netdev);
return 1; return 1;
} }
if(e1000_tx_csum(adapter, skb))
tx_flags |= E1000_TX_FLAGS_CSUM;
if(adapter->vlgrp && vlan_tx_tag_present(skb)) { if(adapter->vlgrp && vlan_tx_tag_present(skb)) {
tx_flags |= E1000_TX_FLAGS_VLAN; tx_flags |= E1000_TX_FLAGS_VLAN;
tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT); tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT);
} }
if(e1000_tso(adapter, skb, tx_flags))
tx_flags |= E1000_TX_FLAGS_TSO;
else if(e1000_tx_csum(adapter, skb))
tx_flags |= E1000_TX_FLAGS_CSUM;
count = e1000_tx_map(adapter, skb); count = e1000_tx_map(adapter, skb);
e1000_tx_queue(adapter, count, tx_flags); e1000_tx_queue(adapter, count, tx_flags);
......
...@@ -622,9 +622,12 @@ e1000_proc_list_setup(struct e1000_adapter *adapter) ...@@ -622,9 +622,12 @@ e1000_proc_list_setup(struct e1000_adapter *adapter)
LIST_ADD_U("Rx_Long_Length_Errors", &adapter->stats.roc); LIST_ADD_U("Rx_Long_Length_Errors", &adapter->stats.roc);
LIST_ADD_U("Rx_Short_Length_Errors", &adapter->stats.ruc); LIST_ADD_U("Rx_Short_Length_Errors", &adapter->stats.ruc);
/* The 82542 does not have an alignment error count register */ /* The 82542 does not have some of these stats */
if(adapter->hw.mac_type >= e1000_82543) if(adapter->hw.mac_type >= e1000_82543) {
LIST_ADD_U("Rx_Align_Errors", &adapter->stats.algnerrc); LIST_ADD_U("Rx_Align_Errors", &adapter->stats.algnerrc);
LIST_ADD_U("Tx_TCP_Seg_Good", &adapter->stats.tsctc);
LIST_ADD_U("Tx_TCP_Seg_Failed", &adapter->stats.tsctfc);
}
LIST_ADD_U("Rx_Flow_Control_XON", &adapter->stats.xonrxc); LIST_ADD_U("Rx_Flow_Control_XON", &adapter->stats.xonrxc);
LIST_ADD_U("Rx_Flow_Control_XOFF", &adapter->stats.xoffrxc); LIST_ADD_U("Rx_Flow_Control_XOFF", &adapter->stats.xoffrxc);
......
...@@ -49,11 +49,72 @@ ...@@ -49,11 +49,72 @@
#include <linux/etherdevice.h> #include <linux/etherdevice.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/checksum.h>
#include <linux/if_ether.h> /* For the statistics structure. */ #include <linux/if_ether.h> /* For the statistics structure. */
#include <linux/if_arp.h> /* For ARPHRD_ETHER */ #include <linux/if_arp.h> /* For ARPHRD_ETHER */
#include <linux/ip.h>
#include <linux/tcp.h>
#define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16) #define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16)
/* KISS: just allocate small chunks and copy bits.
*
* So, in fact, this is documentation, explaining what we expect
* of largesending device modulo TCP checksum, which is ignored for loopback.
*/
static void emulate_large_send_offload(struct sk_buff *skb)
{
struct iphdr *iph = skb->nh.iph;
struct tcphdr *th = (struct tcphdr*)(skb->nh.raw + (iph->ihl * 4));
unsigned int doffset = (iph->ihl + th->doff) * 4;
unsigned int mtu = skb_shinfo(skb)->tso_size + doffset;
unsigned int offset = 0;
u32 seq = ntohl(th->seq);
u16 id = ntohs(iph->id);
while (offset + doffset < skb->len) {
unsigned int frag_size = min(mtu, skb->len - offset) - doffset;
struct sk_buff *nskb = alloc_skb(mtu + 32, GFP_ATOMIC);
if (!nskb)
break;
skb_reserve(nskb, 32);
nskb->mac.raw = nskb->data - 14;
nskb->nh.raw = nskb->data;
iph = nskb->nh.iph;
memcpy(nskb->data, skb->nh.raw, doffset);
if (skb_copy_bits(skb,
doffset + offset,
nskb->data + doffset,
frag_size))
BUG();
skb_put(nskb, doffset + frag_size);
nskb->ip_summed = CHECKSUM_UNNECESSARY;
nskb->dev = skb->dev;
nskb->priority = skb->priority;
nskb->protocol = skb->protocol;
nskb->dst = dst_clone(skb->dst);
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
nskb->pkt_type = skb->pkt_type;
th = (struct tcphdr*)(nskb->nh.raw + iph->ihl*4);
iph->tot_len = htons(frag_size + doffset);
iph->id = htons(id);
iph->check = 0;
iph->check = ip_fast_csum((unsigned char *) iph, iph->ihl);
th->seq = htonl(seq);
if (offset + doffset + frag_size < skb->len)
th->fin = th->psh = 0;
netif_rx(nskb);
offset += frag_size;
seq += frag_size;
id++;
}
dev_kfree_skb(skb);
}
/* /*
* The higher levels take care of making this non-reentrant (it's * The higher levels take care of making this non-reentrant (it's
* called with bh's disabled). * called with bh's disabled).
...@@ -86,6 +147,18 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -86,6 +147,18 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
skb->ip_summed = CHECKSUM_UNNECESSARY; skb->ip_summed = CHECKSUM_UNNECESSARY;
#endif #endif
if (skb_shinfo(skb)->tso_size) {
struct iphdr *iph = skb->nh.iph;
if (skb->protocol != htons(ETH_P_IP))
BUG();
if (iph->protocol != IPPROTO_TCP)
BUG();
emulate_large_send_offload(skb);
return 0;
}
dev->last_rx = jiffies; dev->last_rx = jiffies;
stats->rx_bytes+=skb->len; stats->rx_bytes+=skb->len;
stats->tx_bytes+=skb->len; stats->tx_bytes+=skb->len;
...@@ -117,6 +190,12 @@ int __init loopback_init(struct net_device *dev) ...@@ -117,6 +190,12 @@ int __init loopback_init(struct net_device *dev)
dev->rebuild_header = eth_rebuild_header; dev->rebuild_header = eth_rebuild_header;
dev->flags = IFF_LOOPBACK; dev->flags = IFF_LOOPBACK;
dev->features = NETIF_F_SG|NETIF_F_FRAGLIST|NETIF_F_NO_CSUM|NETIF_F_HIGHDMA; dev->features = NETIF_F_SG|NETIF_F_FRAGLIST|NETIF_F_NO_CSUM|NETIF_F_HIGHDMA;
/* Current netfilter will die with oom linearizing large skbs,
* however this will be cured before 2.5.x is done.
*/
dev->features |= NETIF_F_TSO;
dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL); dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
if (dev->priv == NULL) if (dev->priv == NULL)
return -ENOMEM; return -ENOMEM;
......
...@@ -365,6 +365,7 @@ struct net_device ...@@ -365,6 +365,7 @@ struct net_device
#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ #define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */
#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ #define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */
/* Called after device is detached from network. */ /* Called after device is detached from network. */
void (*uninit)(struct net_device *dev); void (*uninit)(struct net_device *dev);
......
...@@ -109,7 +109,8 @@ struct sk_buff_head { ...@@ -109,7 +109,8 @@ struct sk_buff_head {
struct sk_buff; struct sk_buff;
#define MAX_SKB_FRAGS 6 /* To allow 64K frame to be packed as single skb without frag_list */
#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)
typedef struct skb_frag_struct skb_frag_t; typedef struct skb_frag_struct skb_frag_t;
...@@ -125,6 +126,8 @@ struct skb_frag_struct { ...@@ -125,6 +126,8 @@ struct skb_frag_struct {
struct skb_shared_info { struct skb_shared_info {
atomic_t dataref; atomic_t dataref;
unsigned int nr_frags; unsigned int nr_frags;
unsigned short tso_size;
unsigned short tso_segs;
struct sk_buff *frag_list; struct sk_buff *frag_list;
skb_frag_t frags[MAX_SKB_FRAGS]; skb_frag_t frags[MAX_SKB_FRAGS];
}; };
......
...@@ -241,7 +241,8 @@ struct tcp_opt { ...@@ -241,7 +241,8 @@ struct tcp_opt {
__u32 snd_wnd; /* The window we expect to receive */ __u32 snd_wnd; /* The window we expect to receive */
__u32 max_window; /* Maximal window ever seen from peer */ __u32 max_window; /* Maximal window ever seen from peer */
__u32 pmtu_cookie; /* Last pmtu seen by socket */ __u32 pmtu_cookie; /* Last pmtu seen by socket */
__u16 mss_cache; /* Cached effective mss, not including SACKS */ __u32 mss_cache; /* Cached effective mss, not including SACKS */
__u16 mss_cache_std; /* Like mss_cache, but without TSO */
__u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ __u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
__u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
__u8 ca_state; /* State of fast-retransmit machine */ __u8 ca_state; /* State of fast-retransmit machine */
......
...@@ -53,12 +53,13 @@ static inline void inet_putpeer(struct inet_peer *p) ...@@ -53,12 +53,13 @@ static inline void inet_putpeer(struct inet_peer *p)
extern spinlock_t inet_peer_idlock; extern spinlock_t inet_peer_idlock;
/* can be called with or without local BH being disabled */ /* can be called with or without local BH being disabled */
static inline __u16 inet_getid(struct inet_peer *p) static inline __u16 inet_getid(struct inet_peer *p, int more)
{ {
__u16 id; __u16 id;
spin_lock_bh(&inet_peer_idlock); spin_lock_bh(&inet_peer_idlock);
id = p->ip_id_count++; id = p->ip_id_count;
p->ip_id_count += 1 + more;
spin_unlock_bh(&inet_peer_idlock); spin_unlock_bh(&inet_peer_idlock);
return id; return id;
} }
......
...@@ -187,7 +187,7 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst) ...@@ -187,7 +187,7 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst)
!(dst->mxlock&(1<<RTAX_MTU)))); !(dst->mxlock&(1<<RTAX_MTU))));
} }
extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst); extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more);
static inline void ip_select_ident(struct iphdr *iph, struct dst_entry *dst, struct sock *sk) static inline void ip_select_ident(struct iphdr *iph, struct dst_entry *dst, struct sock *sk)
{ {
...@@ -200,7 +200,19 @@ static inline void ip_select_ident(struct iphdr *iph, struct dst_entry *dst, str ...@@ -200,7 +200,19 @@ static inline void ip_select_ident(struct iphdr *iph, struct dst_entry *dst, str
iph->id = (sk && inet_sk(sk)->daddr) ? iph->id = (sk && inet_sk(sk)->daddr) ?
htons(inet_sk(sk)->id++) : 0; htons(inet_sk(sk)->id++) : 0;
} else } else
__ip_select_ident(iph, dst); __ip_select_ident(iph, dst, 0);
}
static inline void ip_select_ident_more(struct iphdr *iph, struct dst_entry *dst, struct sock *sk, int more)
{
if (iph->frag_off&__constant_htons(IP_DF)) {
if (sk && inet_sk(sk)->daddr) {
iph->id = htons(inet_sk(sk)->id);
inet_sk(sk)->id += 1 + more;
} else
iph->id = 0;
} else
__ip_select_ident(iph, dst, more);
} }
/* /*
......
...@@ -130,7 +130,7 @@ struct sock { ...@@ -130,7 +130,7 @@ struct sock {
bsdism; bsdism;
unsigned char debug; unsigned char debug;
unsigned char rcvtstamp; unsigned char rcvtstamp;
/* Hole of 1 byte. Try to pack. */ unsigned char no_largesend;
int route_caps; int route_caps;
int proc; int proc;
unsigned long lingertime; unsigned long lingertime;
......
...@@ -905,16 +905,21 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long ...@@ -905,16 +905,21 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long
/* Compute the current effective MSS, taking SACKs and IP options, /* Compute the current effective MSS, taking SACKs and IP options,
* and even PMTU discovery events into account. * and even PMTU discovery events into account.
*
* LARGESEND note: !urg_mode is overkill, only frames up to snd_up
* cannot be large. However, taking into account rare use of URG, this
* is not a big flaw.
*/ */
static __inline__ unsigned int tcp_current_mss(struct sock *sk) static __inline__ unsigned int tcp_current_mss(struct sock *sk, int large)
{ {
struct tcp_opt *tp = tcp_sk(sk); struct tcp_opt *tp = tcp_sk(sk);
struct dst_entry *dst = __sk_dst_get(sk); struct dst_entry *dst = __sk_dst_get(sk);
int mss_now = tp->mss_cache; int mss_now = large && (sk->route_caps&NETIF_F_TSO) && !tp->urg_mode ?
tp->mss_cache : tp->mss_cache_std;
if (dst && dst->pmtu != tp->pmtu_cookie) if (dst && dst->pmtu != tp->pmtu_cookie)
mss_now = tcp_sync_mss(sk, dst->pmtu); mss_now = tcp_sync_mss(sk, dst->pmtu);
if (tp->eff_sacks) if (tp->eff_sacks)
mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
...@@ -933,7 +938,7 @@ static __inline__ unsigned int tcp_current_mss(struct sock *sk) ...@@ -933,7 +938,7 @@ static __inline__ unsigned int tcp_current_mss(struct sock *sk)
static inline void tcp_initialize_rcv_mss(struct sock *sk) static inline void tcp_initialize_rcv_mss(struct sock *sk)
{ {
struct tcp_opt *tp = tcp_sk(sk); struct tcp_opt *tp = tcp_sk(sk);
unsigned int hint = min(tp->advmss, tp->mss_cache); unsigned int hint = min(tp->advmss, tp->mss_cache_std);
hint = min(hint, tp->rcv_wnd/2); hint = min(hint, tp->rcv_wnd/2);
hint = min(hint, TCP_MIN_RCVMSS); hint = min(hint, TCP_MIN_RCVMSS);
...@@ -1269,7 +1274,7 @@ static __inline__ void __tcp_push_pending_frames(struct sock *sk, ...@@ -1269,7 +1274,7 @@ static __inline__ void __tcp_push_pending_frames(struct sock *sk,
static __inline__ void tcp_push_pending_frames(struct sock *sk, static __inline__ void tcp_push_pending_frames(struct sock *sk,
struct tcp_opt *tp) struct tcp_opt *tp)
{ {
__tcp_push_pending_frames(sk, tp, tcp_current_mss(sk), tp->nonagle); __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle);
} }
static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp) static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp)
...@@ -1277,7 +1282,7 @@ static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp) ...@@ -1277,7 +1282,7 @@ static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp)
struct sk_buff *skb = tp->send_head; struct sk_buff *skb = tp->send_head;
return (skb && return (skb &&
tcp_snd_test(tp, skb, tcp_current_mss(sk), tcp_snd_test(tp, skb, tcp_current_mss(sk, 1),
tcp_skb_is_last(sk, skb) ? 1 : tp->nonagle)); tcp_skb_is_last(sk, skb) ? 1 : tp->nonagle));
} }
...@@ -1839,6 +1844,15 @@ static inline int tcp_paws_check(struct tcp_opt *tp, int rst) ...@@ -1839,6 +1844,15 @@ static inline int tcp_paws_check(struct tcp_opt *tp, int rst)
return 1; return 1;
} }
static inline void tcp_v4_setup_caps(struct sock *sk, struct dst_entry *dst)
{
sk->route_caps = dst->dev->features;
if (sk->route_caps & NETIF_F_TSO) {
if (sk->no_largesend)
sk->route_caps &= ~NETIF_F_TSO;
}
}
#define TCP_CHECK_TIMER(sk) do { } while (0) #define TCP_CHECK_TIMER(sk) do { } while (0)
#endif /* _TCP_H */ #endif /* _TCP_H */
...@@ -28,12 +28,13 @@ TCP_ECN_send_synack(struct tcp_opt *tp, struct sk_buff *skb) ...@@ -28,12 +28,13 @@ TCP_ECN_send_synack(struct tcp_opt *tp, struct sk_buff *skb)
} }
static __inline__ void static __inline__ void
TCP_ECN_send_syn(struct tcp_opt *tp, struct sk_buff *skb) TCP_ECN_send_syn(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
{ {
tp->ecn_flags = 0; tp->ecn_flags = 0;
if (sysctl_tcp_ecn) { if (sysctl_tcp_ecn && !(sk->route_caps&NETIF_F_TSO)) {
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR; TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR;
tp->ecn_flags = TCP_ECN_OK; tp->ecn_flags = TCP_ECN_OK;
sk->no_largesend = 1;
} }
} }
......
...@@ -209,6 +209,8 @@ struct sk_buff *alloc_skb(unsigned int size, int gfp_mask) ...@@ -209,6 +209,8 @@ struct sk_buff *alloc_skb(unsigned int size, int gfp_mask)
atomic_set(&skb->users, 1); atomic_set(&skb->users, 1);
atomic_set(&(skb_shinfo(skb)->dataref), 1); atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0; skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->tso_size = 0;
skb_shinfo(skb)->tso_segs = 0;
skb_shinfo(skb)->frag_list = NULL; skb_shinfo(skb)->frag_list = NULL;
out: out:
return skb; return skb;
...@@ -490,6 +492,7 @@ int skb_linearize(struct sk_buff *skb, int gfp_mask) ...@@ -490,6 +492,7 @@ int skb_linearize(struct sk_buff *skb, int gfp_mask)
unsigned int size; unsigned int size;
u8 *data; u8 *data;
long offset; long offset;
struct skb_shared_info *ninfo;
int headerlen = skb->data - skb->head; int headerlen = skb->data - skb->head;
int expand = (skb->tail + skb->data_len) - skb->end; int expand = (skb->tail + skb->data_len) - skb->end;
...@@ -509,6 +512,14 @@ int skb_linearize(struct sk_buff *skb, int gfp_mask) ...@@ -509,6 +512,14 @@ int skb_linearize(struct sk_buff *skb, int gfp_mask)
if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len)) if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
BUG(); BUG();
/* Set up shinfo */
ninfo = (struct skb_shared_info*)(data + size);
atomic_set(&ninfo->dataref, 1);
ninfo->tso_size = skb_shinfo(skb)->tso_size;
ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
ninfo->nr_frags = 0;
ninfo->frag_list = NULL;
/* Offset between the two in bytes */ /* Offset between the two in bytes */
offset = data - skb->head; offset = data - skb->head;
...@@ -525,11 +536,6 @@ int skb_linearize(struct sk_buff *skb, int gfp_mask) ...@@ -525,11 +536,6 @@ int skb_linearize(struct sk_buff *skb, int gfp_mask)
skb->tail += offset; skb->tail += offset;
skb->data += offset; skb->data += offset;
/* Set up shinfo */
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->frag_list = NULL;
/* We are no longer a clone, even if we were. */ /* We are no longer a clone, even if we were. */
skb->cloned = 0; skb->cloned = 0;
...@@ -583,6 +589,8 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) ...@@ -583,6 +589,8 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
} }
skb_shinfo(n)->nr_frags = i; skb_shinfo(n)->nr_frags = i;
} }
skb_shinfo(n)->tso_size = skb_shinfo(skb)->tso_size;
skb_shinfo(n)->tso_segs = skb_shinfo(skb)->tso_segs;
if (skb_shinfo(skb)->frag_list) { if (skb_shinfo(skb)->frag_list) {
skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
...@@ -694,6 +702,9 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) ...@@ -694,6 +702,9 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
* *
* You must pass %GFP_ATOMIC as the allocation priority if this function * You must pass %GFP_ATOMIC as the allocation priority if this function
* is called from an interrupt. * is called from an interrupt.
*
* BUG ALERT: ip_summed is not copied. Why does this work? Is it used
* only by netfilter in the cases when checksum is recalculated? --ANK
*/ */
struct sk_buff *skb_copy_expand(const struct sk_buff *skb, struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
int newheadroom, int newtailroom, int gfp_mask) int newheadroom, int newtailroom, int gfp_mask)
...@@ -716,6 +727,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, ...@@ -716,6 +727,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
BUG(); BUG();
copy_skb_header(n, skb); copy_skb_header(n, skb);
skb_shinfo(n)->tso_size = skb_shinfo(skb)->tso_size;
skb_shinfo(n)->tso_segs = skb_shinfo(skb)->tso_segs;
return n; return n;
} }
......
...@@ -306,10 +306,20 @@ static inline int ip_queue_xmit2(struct sk_buff *skb) ...@@ -306,10 +306,20 @@ static inline int ip_queue_xmit2(struct sk_buff *skb)
iph = skb->nh.iph; iph = skb->nh.iph;
} }
if (skb->len > rt->u.dst.pmtu) if (skb->len > rt->u.dst.pmtu) {
goto fragment; unsigned int hlen;
if (!(sk->route_caps&NETIF_F_TSO))
goto fragment;
/* Hack zone: all this must be done by TCP. */
hlen = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
skb_shinfo(skb)->tso_size = rt->u.dst.pmtu - hlen;
skb_shinfo(skb)->tso_segs =
(skb->len - hlen + skb_shinfo(skb)->tso_size - 1)/
skb_shinfo(skb)->tso_size - 1;
}
ip_select_ident(iph, &rt->u.dst, sk); ip_select_ident_more(iph, &rt->u.dst, sk, skb_shinfo(skb)->tso_segs);
/* Add an IP checksum. */ /* Add an IP checksum. */
ip_send_check(iph); ip_send_check(iph);
...@@ -371,7 +381,7 @@ int ip_queue_xmit(struct sk_buff *skb) ...@@ -371,7 +381,7 @@ int ip_queue_xmit(struct sk_buff *skb)
sk->bound_dev_if)) sk->bound_dev_if))
goto no_route; goto no_route;
__sk_dst_set(sk, &rt->u.dst); __sk_dst_set(sk, &rt->u.dst);
sk->route_caps = rt->u.dst.dev->features; tcp_v4_setup_caps(sk, &rt->u.dst);
} }
skb->dst = dst_clone(&rt->u.dst); skb->dst = dst_clone(&rt->u.dst);
...@@ -577,7 +587,7 @@ static int ip_build_xmit_slow(struct sock *sk, ...@@ -577,7 +587,7 @@ static int ip_build_xmit_slow(struct sock *sk,
* for packets without DF or having * for packets without DF or having
* been fragmented. * been fragmented.
*/ */
__ip_select_ident(iph, &rt->u.dst); __ip_select_ident(iph, &rt->u.dst, 0);
id = iph->id; id = iph->id;
} }
......
...@@ -729,7 +729,7 @@ static void ip_select_fb_ident(struct iphdr *iph) ...@@ -729,7 +729,7 @@ static void ip_select_fb_ident(struct iphdr *iph)
spin_unlock_bh(&ip_fb_id_lock); spin_unlock_bh(&ip_fb_id_lock);
} }
void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst) void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
{ {
struct rtable *rt = (struct rtable *) dst; struct rtable *rt = (struct rtable *) dst;
...@@ -741,7 +741,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst) ...@@ -741,7 +741,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst)
so that we need not to grab a lock to dereference it. so that we need not to grab a lock to dereference it.
*/ */
if (rt->peer) { if (rt->peer) {
iph->id = htons(inet_getid(rt->peer)); iph->id = htons(inet_getid(rt->peer, more));
return; return;
} }
} else } else
......
...@@ -846,7 +846,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, ...@@ -846,7 +846,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags); clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
mss_now = tcp_current_mss(sk); mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
copied = 0; copied = 0;
err = -EPIPE; err = -EPIPE;
...@@ -921,7 +921,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, ...@@ -921,7 +921,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
if ((err = wait_for_tcp_memory(sk, &timeo)) != 0) if ((err = wait_for_tcp_memory(sk, &timeo)) != 0)
goto do_error; goto do_error;
mss_now = tcp_current_mss(sk); mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
} }
out: out:
...@@ -1001,7 +1001,7 @@ static inline int skb_add_data(struct sk_buff *skb, char *from, int copy) ...@@ -1001,7 +1001,7 @@ static inline int skb_add_data(struct sk_buff *skb, char *from, int copy)
static inline int select_size(struct sock *sk, struct tcp_opt *tp) static inline int select_size(struct sock *sk, struct tcp_opt *tp)
{ {
int tmp = tp->mss_cache; int tmp = tp->mss_cache_std;
if (sk->route_caps & NETIF_F_SG) { if (sk->route_caps & NETIF_F_SG) {
int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
...@@ -1037,7 +1037,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size) ...@@ -1037,7 +1037,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size)
/* This should be in poll */ /* This should be in poll */
clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags); clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
mss_now = tcp_current_mss(sk); mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
/* Ok commence sending. */ /* Ok commence sending. */
iovlen = msg->msg_iovlen; iovlen = msg->msg_iovlen;
...@@ -1192,7 +1192,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size) ...@@ -1192,7 +1192,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size)
if ((err = wait_for_tcp_memory(sk, &timeo)) != 0) if ((err = wait_for_tcp_memory(sk, &timeo)) != 0)
goto do_error; goto do_error;
mss_now = tcp_current_mss(sk); mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
} }
} }
...@@ -2444,7 +2444,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, ...@@ -2444,7 +2444,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval,
switch (optname) { switch (optname) {
case TCP_MAXSEG: case TCP_MAXSEG:
val = tp->mss_cache; val = tp->mss_cache_std;
if (!val && ((1 << sk->state) & (TCPF_CLOSE | TCPF_LISTEN))) if (!val && ((1 << sk->state) & (TCPF_CLOSE | TCPF_LISTEN)))
val = tp->user_mss; val = tp->user_mss;
break; break;
...@@ -2507,7 +2507,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, ...@@ -2507,7 +2507,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval,
info.tcpi_rto = (1000000 * tp->rto) / HZ; info.tcpi_rto = (1000000 * tp->rto) / HZ;
info.tcpi_ato = (1000000 * tp->ack.ato) / HZ; info.tcpi_ato = (1000000 * tp->ack.ato) / HZ;
info.tcpi_snd_mss = tp->mss_cache; info.tcpi_snd_mss = tp->mss_cache_std;
info.tcpi_rcv_mss = tp->ack.rcv_mss; info.tcpi_rcv_mss = tp->ack.rcv_mss;
info.tcpi_unacked = tp->packets_out; info.tcpi_unacked = tp->packets_out;
......
...@@ -772,6 +772,14 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ ...@@ -772,6 +772,14 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
int flag = 0; int flag = 0;
int i; int i;
/* So, SACKs for already sent large segments will be lost.
* Not good, but alternative is to resegment the queue. */
if (sk->route_caps&NETIF_F_TSO) {
sk->route_caps &= ~NETIF_F_TSO;
sk->no_largesend = 1;
tp->mss_cache = tp->mss_cache_std;
}
if (!tp->sacked_out) if (!tp->sacked_out)
tp->fackets_out = 0; tp->fackets_out = 0;
prior_fackets = tp->fackets_out; prior_fackets = tp->fackets_out;
...@@ -2963,6 +2971,8 @@ void tcp_cwnd_application_limited(struct sock *sk) ...@@ -2963,6 +2971,8 @@ void tcp_cwnd_application_limited(struct sock *sk)
/* When incoming ACK allowed to free some skb from write_queue, /* When incoming ACK allowed to free some skb from write_queue,
* we remember this event in flag tp->queue_shrunk and wake up socket * we remember this event in flag tp->queue_shrunk and wake up socket
* on the exit from tcp input handler. * on the exit from tcp input handler.
*
* PROBLEM: sndbuf expansion does not work well with largesend.
*/ */
static void tcp_new_space(struct sock *sk) static void tcp_new_space(struct sock *sk)
{ {
...@@ -2972,8 +2982,8 @@ static void tcp_new_space(struct sock *sk) ...@@ -2972,8 +2982,8 @@ static void tcp_new_space(struct sock *sk)
!(sk->userlocks&SOCK_SNDBUF_LOCK) && !(sk->userlocks&SOCK_SNDBUF_LOCK) &&
!tcp_memory_pressure && !tcp_memory_pressure &&
atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
int sndmem = tp->mss_clamp + MAX_TCP_HEADER + 16 + int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache) +
sizeof(struct sk_buff), MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
demanded = max_t(unsigned int, tp->snd_cwnd, demanded = max_t(unsigned int, tp->snd_cwnd,
tp->reordering + 1); tp->reordering + 1);
sndmem *= 2*demanded; sndmem *= 2*demanded;
...@@ -3502,6 +3512,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -3502,6 +3512,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
*/ */
TCP_ECN_rcv_synack(tp, th); TCP_ECN_rcv_synack(tp, th);
if (tp->ecn_flags&TCP_ECN_OK)
sk->no_largesend = 1;
tp->snd_wl1 = TCP_SKB_CB(skb)->seq; tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
tcp_ack(sk, skb, FLAG_SLOWPATH); tcp_ack(sk, skb, FLAG_SLOWPATH);
...@@ -3627,10 +3639,13 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -3627,10 +3639,13 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tp->snd_wl1 = TCP_SKB_CB(skb)->seq; tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
tp->max_window = tp->snd_wnd; tp->max_window = tp->snd_wnd;
TCP_ECN_rcv_syn(tp, th);
if (tp->ecn_flags&TCP_ECN_OK)
sk->no_largesend = 1;
tcp_sync_mss(sk, tp->pmtu_cookie); tcp_sync_mss(sk, tp->pmtu_cookie);
tcp_initialize_rcv_mss(sk); tcp_initialize_rcv_mss(sk);
TCP_ECN_rcv_syn(tp, th);
tcp_send_synack(sk); tcp_send_synack(sk);
#if 0 #if 0
......
...@@ -780,7 +780,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) ...@@ -780,7 +780,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
} }
__sk_dst_set(sk, &rt->u.dst); __sk_dst_set(sk, &rt->u.dst);
sk->route_caps = rt->u.dst.dev->features; tcp_v4_setup_caps(sk, &rt->u.dst);
if (!inet->opt || !inet->opt->srr) if (!inet->opt || !inet->opt->srr)
daddr = rt->rt_dst; daddr = rt->rt_dst;
...@@ -1559,7 +1559,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ...@@ -1559,7 +1559,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
goto exit; goto exit;
newsk->dst_cache = dst; newsk->dst_cache = dst;
newsk->route_caps = dst->dev->features; tcp_v4_setup_caps(newsk, dst);
newtp = tcp_sk(newsk); newtp = tcp_sk(newsk);
newinet = inet_sk(newsk); newinet = inet_sk(newsk);
...@@ -1865,7 +1865,7 @@ static int tcp_v4_reselect_saddr(struct sock *sk) ...@@ -1865,7 +1865,7 @@ static int tcp_v4_reselect_saddr(struct sock *sk)
return err; return err;
__sk_dst_set(sk, &rt->u.dst); __sk_dst_set(sk, &rt->u.dst);
sk->route_caps = rt->u.dst.dev->features; tcp_v4_setup_caps(sk, &rt->u.dst);
new_saddr = rt->rt_src; new_saddr = rt->rt_src;
...@@ -1913,7 +1913,7 @@ int tcp_v4_rebuild_header(struct sock *sk) ...@@ -1913,7 +1913,7 @@ int tcp_v4_rebuild_header(struct sock *sk)
RT_CONN_FLAGS(sk), sk->bound_dev_if); RT_CONN_FLAGS(sk), sk->bound_dev_if);
if (!err) { if (!err) {
__sk_dst_set(sk, &rt->u.dst); __sk_dst_set(sk, &rt->u.dst);
sk->route_caps = rt->u.dst.dev->features; tcp_v4_setup_caps(sk, &rt->u.dst);
return 0; return 0;
} }
......
...@@ -786,6 +786,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, ...@@ -786,6 +786,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len; newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len;
newtp->mss_clamp = req->mss; newtp->mss_clamp = req->mss;
TCP_ECN_openreq_child(newtp, req); TCP_ECN_openreq_child(newtp, req);
if (newtp->ecn_flags&TCP_ECN_OK)
newsk->no_largesend = 1;
TCP_INC_STATS_BH(TcpPassiveOpens); TCP_INC_STATS_BH(TcpPassiveOpens);
} }
......
...@@ -531,7 +531,21 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu) ...@@ -531,7 +531,21 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu)
/* And store cached results */ /* And store cached results */
tp->pmtu_cookie = pmtu; tp->pmtu_cookie = pmtu;
tp->mss_cache = mss_now; tp->mss_cache = tp->mss_cache_std = mss_now;
if (sk->route_caps&NETIF_F_TSO) {
int large_mss;
large_mss = 65535 - tp->af_specific->net_header_len -
tp->ext_header_len - tp->tcp_header_len;
if (tp->max_window && large_mss > (tp->max_window>>1))
large_mss = max((tp->max_window>>1), 68U - tp->tcp_header_len);
/* Always keep large mss multiple of real mss. */
tp->mss_cache = mss_now*(large_mss/mss_now);
}
return mss_now; return mss_now;
} }
...@@ -561,7 +575,7 @@ int tcp_write_xmit(struct sock *sk, int nonagle) ...@@ -561,7 +575,7 @@ int tcp_write_xmit(struct sock *sk, int nonagle)
* We also handle things correctly when the user adds some * We also handle things correctly when the user adds some
* IP options mid-stream. Silly to do, but cover it. * IP options mid-stream. Silly to do, but cover it.
*/ */
mss_now = tcp_current_mss(sk); mss_now = tcp_current_mss(sk, 1);
while((skb = tp->send_head) && while((skb = tp->send_head) &&
tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb) ? nonagle : 1)) { tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb) ? nonagle : 1)) {
...@@ -767,7 +781,7 @@ void tcp_simple_retransmit(struct sock *sk) ...@@ -767,7 +781,7 @@ void tcp_simple_retransmit(struct sock *sk)
{ {
struct tcp_opt *tp = tcp_sk(sk); struct tcp_opt *tp = tcp_sk(sk);
struct sk_buff *skb; struct sk_buff *skb;
unsigned int mss = tcp_current_mss(sk); unsigned int mss = tcp_current_mss(sk, 0);
int lost = 0; int lost = 0;
for_retrans_queue(skb, sk, tp) { for_retrans_queue(skb, sk, tp) {
...@@ -812,7 +826,7 @@ void tcp_simple_retransmit(struct sock *sk) ...@@ -812,7 +826,7 @@ void tcp_simple_retransmit(struct sock *sk)
int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
{ {
struct tcp_opt *tp = tcp_sk(sk); struct tcp_opt *tp = tcp_sk(sk);
unsigned int cur_mss = tcp_current_mss(sk); unsigned int cur_mss = tcp_current_mss(sk, 0);
int err; int err;
/* Do not sent more than we queued. 1/4 is reserved for possible /* Do not sent more than we queued. 1/4 is reserved for possible
...@@ -821,6 +835,27 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) ...@@ -821,6 +835,27 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (atomic_read(&sk->wmem_alloc) > min(sk->wmem_queued+(sk->wmem_queued>>2),sk->sndbuf)) if (atomic_read(&sk->wmem_alloc) > min(sk->wmem_queued+(sk->wmem_queued>>2),sk->sndbuf))
return -EAGAIN; return -EAGAIN;
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
struct sk_buff *skb2;
if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
BUG();
if (sk->route_caps&NETIF_F_TSO) {
sk->route_caps &= ~NETIF_F_TSO;
sk->no_largesend = 1;
tp->mss_cache = tp->mss_cache_std;
}
if(tcp_fragment(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
return -ENOMEM;
skb2 = skb->next;
__skb_unlink(skb, skb->list);
tcp_free_skb(sk, skb);
skb = skb2;
}
/* If receiver has shrunk his window, and skb is out of /* If receiver has shrunk his window, and skb is out of
* new window, do not retransmit it. The exception is the * new window, do not retransmit it. The exception is the
* case, when window is shrunk to zero. In this case * case, when window is shrunk to zero. In this case
...@@ -998,7 +1033,7 @@ void tcp_send_fin(struct sock *sk) ...@@ -998,7 +1033,7 @@ void tcp_send_fin(struct sock *sk)
* unsent frames. But be careful about outgoing SACKS * unsent frames. But be careful about outgoing SACKS
* and IP options. * and IP options.
*/ */
mss_now = tcp_current_mss(sk); mss_now = tcp_current_mss(sk, 1);
if(tp->send_head != NULL) { if(tp->send_head != NULL) {
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
...@@ -1121,6 +1156,8 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, ...@@ -1121,6 +1156,8 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
memset(th, 0, sizeof(struct tcphdr)); memset(th, 0, sizeof(struct tcphdr));
th->syn = 1; th->syn = 1;
th->ack = 1; th->ack = 1;
if (dst->dev->features&NETIF_F_TSO)
req->ecn_ok = 0;
TCP_ECN_make_synack(req, th); TCP_ECN_make_synack(req, th);
th->source = inet_sk(sk)->sport; th->source = inet_sk(sk)->sport;
th->dest = req->rmt_port; th->dest = req->rmt_port;
...@@ -1224,7 +1261,7 @@ int tcp_connect(struct sock *sk) ...@@ -1224,7 +1261,7 @@ int tcp_connect(struct sock *sk)
skb_reserve(buff, MAX_TCP_HEADER); skb_reserve(buff, MAX_TCP_HEADER);
TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
TCP_ECN_send_syn(tp, buff); TCP_ECN_send_syn(sk, tp, buff);
TCP_SKB_CB(buff)->sacked = 0; TCP_SKB_CB(buff)->sacked = 0;
buff->csum = 0; buff->csum = 0;
TCP_SKB_CB(buff)->seq = tp->write_seq++; TCP_SKB_CB(buff)->seq = tp->write_seq++;
...@@ -1379,7 +1416,7 @@ int tcp_write_wakeup(struct sock *sk) ...@@ -1379,7 +1416,7 @@ int tcp_write_wakeup(struct sock *sk)
if ((skb = tp->send_head) != NULL && if ((skb = tp->send_head) != NULL &&
before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) { before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
int err; int err;
int mss = tcp_current_mss(sk); int mss = tcp_current_mss(sk, 0);
int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq; int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq;
if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
...@@ -1395,6 +1432,13 @@ int tcp_write_wakeup(struct sock *sk) ...@@ -1395,6 +1432,13 @@ int tcp_write_wakeup(struct sock *sk)
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
if (tcp_fragment(sk, skb, seg_size)) if (tcp_fragment(sk, skb, seg_size))
return -1; return -1;
/* SWS override triggered forced fragmentation.
* Disable TSO, the connection is too sick. */
if (sk->route_caps&NETIF_F_TSO) {
sk->no_largesend = 1;
sk->route_caps &= ~NETIF_F_TSO;
tp->mss_cache = tp->mss_cache_std;
}
} }
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
TCP_SKB_CB(skb)->when = tcp_time_stamp; TCP_SKB_CB(skb)->when = tcp_time_stamp;
......
...@@ -659,7 +659,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ...@@ -659,7 +659,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
} }
ip6_dst_store(sk, dst, NULL); ip6_dst_store(sk, dst, NULL);
sk->route_caps = dst->dev->features&~NETIF_F_IP_CSUM; sk->route_caps = dst->dev->features&~(NETIF_F_IP_CSUM|NETIF_F_TSO);
if (saddr == NULL) { if (saddr == NULL) {
err = ipv6_get_saddr(dst, &np->daddr, &saddr_buf); err = ipv6_get_saddr(dst, &np->daddr, &saddr_buf);
...@@ -1333,7 +1333,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ...@@ -1333,7 +1333,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
MOD_INC_USE_COUNT; MOD_INC_USE_COUNT;
ip6_dst_store(newsk, dst, NULL); ip6_dst_store(newsk, dst, NULL);
sk->route_caps = dst->dev->features&~NETIF_F_IP_CSUM; sk->route_caps = dst->dev->features&~(NETIF_F_IP_CSUM|NETIF_F_TSO);
newtcp6sk = (struct tcp6_sock *)newsk; newtcp6sk = (struct tcp6_sock *)newsk;
newtcp6sk->pinet6 = &newtcp6sk->inet6; newtcp6sk->pinet6 = &newtcp6sk->inet6;
...@@ -1721,7 +1721,7 @@ static int tcp_v6_rebuild_header(struct sock *sk) ...@@ -1721,7 +1721,7 @@ static int tcp_v6_rebuild_header(struct sock *sk)
} }
ip6_dst_store(sk, dst, NULL); ip6_dst_store(sk, dst, NULL);
sk->route_caps = dst->dev->features&~NETIF_F_IP_CSUM; sk->route_caps = dst->dev->features&~(NETIF_F_IP_CSUM|NETIF_F_TSO);
} }
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment