Commit 2ade4361 authored by Jeff Garzik's avatar Jeff Garzik

Merge branch 'lro'

parents 75e47b36 79dc1901
...@@ -57,16 +57,20 @@ ...@@ -57,16 +57,20 @@
#include <linux/ethtool.h> #include <linux/ethtool.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/if_vlan.h> #include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <net/tcp.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/div64.h>
/* local include */ /* local include */
#include "s2io.h" #include "s2io.h"
#include "s2io-regs.h" #include "s2io-regs.h"
#define DRV_VERSION "Version 2.0.9.4" #define DRV_VERSION "2.0.11.2"
/* S2io Driver name & version. */ /* S2io Driver name & version. */
static char s2io_driver_name[] = "Neterion"; static char s2io_driver_name[] = "Neterion";
...@@ -168,6 +172,11 @@ static char ethtool_stats_keys[][ETH_GSTRING_LEN] = { ...@@ -168,6 +172,11 @@ static char ethtool_stats_keys[][ETH_GSTRING_LEN] = {
{"\n DRIVER STATISTICS"}, {"\n DRIVER STATISTICS"},
{"single_bit_ecc_errs"}, {"single_bit_ecc_errs"},
{"double_bit_ecc_errs"}, {"double_bit_ecc_errs"},
("lro_aggregated_pkts"),
("lro_flush_both_count"),
("lro_out_of_sequence_pkts"),
("lro_flush_due_to_max_pkts"),
("lro_avg_aggr_pkts"),
}; };
#define S2IO_STAT_LEN sizeof(ethtool_stats_keys)/ ETH_GSTRING_LEN #define S2IO_STAT_LEN sizeof(ethtool_stats_keys)/ ETH_GSTRING_LEN
...@@ -317,6 +326,12 @@ static unsigned int indicate_max_pkts; ...@@ -317,6 +326,12 @@ static unsigned int indicate_max_pkts;
static unsigned int rxsync_frequency = 3; static unsigned int rxsync_frequency = 3;
/* Interrupt type. Values can be 0(INTA), 1(MSI), 2(MSI_X) */ /* Interrupt type. Values can be 0(INTA), 1(MSI), 2(MSI_X) */
static unsigned int intr_type = 0; static unsigned int intr_type = 0;
/* Large receive offload feature */
static unsigned int lro = 0;
/* Max pkts to be aggregated by LRO at one time. If not specified,
* aggregation happens until we hit max IP pkt size(64K)
*/
static unsigned int lro_max_pkts = 0xFFFF;
/* /*
* S2IO device table. * S2IO device table.
...@@ -1476,6 +1491,19 @@ static int init_nic(struct s2io_nic *nic) ...@@ -1476,6 +1491,19 @@ static int init_nic(struct s2io_nic *nic)
writel((u32) (val64 >> 32), (add + 4)); writel((u32) (val64 >> 32), (add + 4));
val64 = readq(&bar0->mac_cfg); val64 = readq(&bar0->mac_cfg);
/* Enable FCS stripping by adapter */
add = &bar0->mac_cfg;
val64 = readq(&bar0->mac_cfg);
val64 |= MAC_CFG_RMAC_STRIP_FCS;
if (nic->device_type == XFRAME_II_DEVICE)
writeq(val64, &bar0->mac_cfg);
else {
writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key);
writel((u32) (val64), add);
writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key);
writel((u32) (val64 >> 32), (add + 4));
}
/* /*
* Set the time value to be inserted in the pause frame * Set the time value to be inserted in the pause frame
* generated by xena. * generated by xena.
...@@ -2569,6 +2597,8 @@ static void rx_intr_handler(ring_info_t *ring_data) ...@@ -2569,6 +2597,8 @@ static void rx_intr_handler(ring_info_t *ring_data)
#ifndef CONFIG_S2IO_NAPI #ifndef CONFIG_S2IO_NAPI
int pkt_cnt = 0; int pkt_cnt = 0;
#endif #endif
int i;
spin_lock(&nic->rx_lock); spin_lock(&nic->rx_lock);
if (atomic_read(&nic->card_state) == CARD_DOWN) { if (atomic_read(&nic->card_state) == CARD_DOWN) {
DBG_PRINT(INTR_DBG, "%s: %s going down for reset\n", DBG_PRINT(INTR_DBG, "%s: %s going down for reset\n",
...@@ -2661,6 +2691,18 @@ static void rx_intr_handler(ring_info_t *ring_data) ...@@ -2661,6 +2691,18 @@ static void rx_intr_handler(ring_info_t *ring_data)
break; break;
#endif #endif
} }
if (nic->lro) {
/* Clear all LRO sessions before exiting */
for (i=0; i<MAX_LRO_SESSIONS; i++) {
lro_t *lro = &nic->lro0_n[i];
if (lro->in_use) {
update_L3L4_header(nic, lro);
queue_rx_frame(lro->parent);
clear_lro_session(lro);
}
}
}
spin_unlock(&nic->rx_lock); spin_unlock(&nic->rx_lock);
} }
...@@ -3668,23 +3710,32 @@ s2io_msi_handle(int irq, void *dev_id, struct pt_regs *regs) ...@@ -3668,23 +3710,32 @@ s2io_msi_handle(int irq, void *dev_id, struct pt_regs *regs)
* else schedule a tasklet to reallocate the buffers. * else schedule a tasklet to reallocate the buffers.
*/ */
for (i = 0; i < config->rx_ring_num; i++) { for (i = 0; i < config->rx_ring_num; i++) {
int rxb_size = atomic_read(&sp->rx_bufs_left[i]); if (!sp->lro) {
int level = rx_buffer_level(sp, rxb_size, i); int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
int level = rx_buffer_level(sp, rxb_size, i);
if ((level == PANIC) && (!TASKLET_IN_USE)) {
DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", dev->name); if ((level == PANIC) && (!TASKLET_IN_USE)) {
DBG_PRINT(INTR_DBG, "PANIC levels\n"); DBG_PRINT(INTR_DBG, "%s: Rx BD hit ",
if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) { dev->name);
DBG_PRINT(ERR_DBG, "%s:Out of memory", DBG_PRINT(INTR_DBG, "PANIC levels\n");
dev->name); if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
DBG_PRINT(ERR_DBG, " in ISR!!\n"); DBG_PRINT(ERR_DBG, "%s:Out of memory",
dev->name);
DBG_PRINT(ERR_DBG, " in ISR!!\n");
clear_bit(0, (&sp->tasklet_status));
atomic_dec(&sp->isr_cnt);
return IRQ_HANDLED;
}
clear_bit(0, (&sp->tasklet_status)); clear_bit(0, (&sp->tasklet_status));
atomic_dec(&sp->isr_cnt); } else if (level == LOW) {
return IRQ_HANDLED; tasklet_schedule(&sp->task);
} }
clear_bit(0, (&sp->tasklet_status)); }
} else if (level == LOW) { else if (fill_rx_buffers(sp, i) == -ENOMEM) {
tasklet_schedule(&sp->task); DBG_PRINT(ERR_DBG, "%s:Out of memory",
dev->name);
DBG_PRINT(ERR_DBG, " in Rx Intr!!\n");
break;
} }
} }
...@@ -3697,29 +3748,37 @@ s2io_msix_ring_handle(int irq, void *dev_id, struct pt_regs *regs) ...@@ -3697,29 +3748,37 @@ s2io_msix_ring_handle(int irq, void *dev_id, struct pt_regs *regs)
{ {
ring_info_t *ring = (ring_info_t *)dev_id; ring_info_t *ring = (ring_info_t *)dev_id;
nic_t *sp = ring->nic; nic_t *sp = ring->nic;
struct net_device *dev = (struct net_device *) dev_id;
int rxb_size, level, rng_n; int rxb_size, level, rng_n;
atomic_inc(&sp->isr_cnt); atomic_inc(&sp->isr_cnt);
rx_intr_handler(ring); rx_intr_handler(ring);
rng_n = ring->ring_no; rng_n = ring->ring_no;
rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]); if (!sp->lro) {
level = rx_buffer_level(sp, rxb_size, rng_n); rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]);
level = rx_buffer_level(sp, rxb_size, rng_n);
if ((level == PANIC) && (!TASKLET_IN_USE)) {
int ret; if ((level == PANIC) && (!TASKLET_IN_USE)) {
DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__); int ret;
DBG_PRINT(INTR_DBG, "PANIC levels\n"); DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__);
if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) { DBG_PRINT(INTR_DBG, "PANIC levels\n");
DBG_PRINT(ERR_DBG, "Out of memory in %s", if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) {
__FUNCTION__); DBG_PRINT(ERR_DBG, "Out of memory in %s",
__FUNCTION__);
clear_bit(0, (&sp->tasklet_status));
return IRQ_HANDLED;
}
clear_bit(0, (&sp->tasklet_status)); clear_bit(0, (&sp->tasklet_status));
return IRQ_HANDLED; } else if (level == LOW) {
tasklet_schedule(&sp->task);
} }
clear_bit(0, (&sp->tasklet_status));
} else if (level == LOW) {
tasklet_schedule(&sp->task);
} }
else if (fill_rx_buffers(sp, rng_n) == -ENOMEM) {
DBG_PRINT(ERR_DBG, "%s:Out of memory", dev->name);
DBG_PRINT(ERR_DBG, " in Rx Intr!!\n");
}
atomic_dec(&sp->isr_cnt); atomic_dec(&sp->isr_cnt);
return IRQ_HANDLED; return IRQ_HANDLED;
...@@ -3875,24 +3934,33 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) ...@@ -3875,24 +3934,33 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs)
*/ */
#ifndef CONFIG_S2IO_NAPI #ifndef CONFIG_S2IO_NAPI
for (i = 0; i < config->rx_ring_num; i++) { for (i = 0; i < config->rx_ring_num; i++) {
int ret; if (!sp->lro) {
int rxb_size = atomic_read(&sp->rx_bufs_left[i]); int ret;
int level = rx_buffer_level(sp, rxb_size, i); int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
int level = rx_buffer_level(sp, rxb_size, i);
if ((level == PANIC) && (!TASKLET_IN_USE)) {
DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", dev->name); if ((level == PANIC) && (!TASKLET_IN_USE)) {
DBG_PRINT(INTR_DBG, "PANIC levels\n"); DBG_PRINT(INTR_DBG, "%s: Rx BD hit ",
if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) { dev->name);
DBG_PRINT(ERR_DBG, "%s:Out of memory", DBG_PRINT(INTR_DBG, "PANIC levels\n");
dev->name); if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
DBG_PRINT(ERR_DBG, " in ISR!!\n"); DBG_PRINT(ERR_DBG, "%s:Out of memory",
dev->name);
DBG_PRINT(ERR_DBG, " in ISR!!\n");
clear_bit(0, (&sp->tasklet_status));
atomic_dec(&sp->isr_cnt);
return IRQ_HANDLED;
}
clear_bit(0, (&sp->tasklet_status)); clear_bit(0, (&sp->tasklet_status));
atomic_dec(&sp->isr_cnt); } else if (level == LOW) {
return IRQ_HANDLED; tasklet_schedule(&sp->task);
} }
clear_bit(0, (&sp->tasklet_status)); }
} else if (level == LOW) { else if (fill_rx_buffers(sp, i) == -ENOMEM) {
tasklet_schedule(&sp->task); DBG_PRINT(ERR_DBG, "%s:Out of memory",
dev->name);
DBG_PRINT(ERR_DBG, " in Rx intr!!\n");
break;
} }
} }
#endif #endif
...@@ -5043,6 +5111,7 @@ static void s2io_get_ethtool_stats(struct net_device *dev, ...@@ -5043,6 +5111,7 @@ static void s2io_get_ethtool_stats(struct net_device *dev,
int i = 0; int i = 0;
nic_t *sp = dev->priv; nic_t *sp = dev->priv;
StatInfo_t *stat_info = sp->mac_control.stats_info; StatInfo_t *stat_info = sp->mac_control.stats_info;
u64 tmp;
s2io_updt_stats(sp); s2io_updt_stats(sp);
tmp_stats[i++] = tmp_stats[i++] =
...@@ -5134,6 +5203,16 @@ static void s2io_get_ethtool_stats(struct net_device *dev, ...@@ -5134,6 +5203,16 @@ static void s2io_get_ethtool_stats(struct net_device *dev,
tmp_stats[i++] = 0; tmp_stats[i++] = 0;
tmp_stats[i++] = stat_info->sw_stat.single_ecc_errs; tmp_stats[i++] = stat_info->sw_stat.single_ecc_errs;
tmp_stats[i++] = stat_info->sw_stat.double_ecc_errs; tmp_stats[i++] = stat_info->sw_stat.double_ecc_errs;
tmp_stats[i++] = stat_info->sw_stat.clubbed_frms_cnt;
tmp_stats[i++] = stat_info->sw_stat.sending_both;
tmp_stats[i++] = stat_info->sw_stat.outof_sequence_pkts;
tmp_stats[i++] = stat_info->sw_stat.flush_max_pkts;
tmp = 0;
if (stat_info->sw_stat.num_aggregations) {
tmp = stat_info->sw_stat.sum_avg_pkts_aggregated;
do_div(tmp, stat_info->sw_stat.num_aggregations);
}
tmp_stats[i++] = tmp;
} }
static int s2io_ethtool_get_regs_len(struct net_device *dev) static int s2io_ethtool_get_regs_len(struct net_device *dev)
...@@ -5515,6 +5594,14 @@ static int s2io_card_up(nic_t * sp) ...@@ -5515,6 +5594,14 @@ static int s2io_card_up(nic_t * sp)
/* Setting its receive mode */ /* Setting its receive mode */
s2io_set_multicast(dev); s2io_set_multicast(dev);
if (sp->lro) {
/* Initialize max aggregatable pkts based on MTU */
sp->lro_max_aggr_per_sess = ((1<<16) - 1) / dev->mtu;
/* Check if we can use(if specified) user provided value */
if (lro_max_pkts < sp->lro_max_aggr_per_sess)
sp->lro_max_aggr_per_sess = lro_max_pkts;
}
/* Enable tasklet for the device */ /* Enable tasklet for the device */
tasklet_init(&sp->task, s2io_tasklet, (unsigned long) dev); tasklet_init(&sp->task, s2io_tasklet, (unsigned long) dev);
...@@ -5607,6 +5694,7 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) ...@@ -5607,6 +5694,7 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp)
((unsigned long) rxdp->Host_Control); ((unsigned long) rxdp->Host_Control);
int ring_no = ring_data->ring_no; int ring_no = ring_data->ring_no;
u16 l3_csum, l4_csum; u16 l3_csum, l4_csum;
lro_t *lro;
skb->dev = dev; skb->dev = dev;
if (rxdp->Control_1 & RXD_T_CODE) { if (rxdp->Control_1 & RXD_T_CODE) {
...@@ -5655,7 +5743,8 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) ...@@ -5655,7 +5743,8 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp)
skb_put(skb, buf2_len); skb_put(skb, buf2_len);
} }
if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && ((!sp->lro) ||
(sp->lro && (!(rxdp->Control_1 & RXD_FRAME_IP_FRAG)))) &&
(sp->rx_csum)) { (sp->rx_csum)) {
l3_csum = RXD_GET_L3_CKSUM(rxdp->Control_1); l3_csum = RXD_GET_L3_CKSUM(rxdp->Control_1);
l4_csum = RXD_GET_L4_CKSUM(rxdp->Control_1); l4_csum = RXD_GET_L4_CKSUM(rxdp->Control_1);
...@@ -5666,6 +5755,54 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) ...@@ -5666,6 +5755,54 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp)
* a flag in the RxD. * a flag in the RxD.
*/ */
skb->ip_summed = CHECKSUM_UNNECESSARY; skb->ip_summed = CHECKSUM_UNNECESSARY;
if (sp->lro) {
u32 tcp_len;
u8 *tcp;
int ret = 0;
ret = s2io_club_tcp_session(skb->data, &tcp,
&tcp_len, &lro, rxdp, sp);
switch (ret) {
case 3: /* Begin anew */
lro->parent = skb;
goto aggregate;
case 1: /* Aggregate */
{
lro_append_pkt(sp, lro,
skb, tcp_len);
goto aggregate;
}
case 4: /* Flush session */
{
lro_append_pkt(sp, lro,
skb, tcp_len);
queue_rx_frame(lro->parent);
clear_lro_session(lro);
sp->mac_control.stats_info->
sw_stat.flush_max_pkts++;
goto aggregate;
}
case 2: /* Flush both */
lro->parent->data_len =
lro->frags_len;
sp->mac_control.stats_info->
sw_stat.sending_both++;
queue_rx_frame(lro->parent);
clear_lro_session(lro);
goto send_up;
case 0: /* sessions exceeded */
case 5: /*
* First pkt in session not
* L3/L4 aggregatable
*/
break;
default:
DBG_PRINT(ERR_DBG,
"%s: Samadhana!!\n",
__FUNCTION__);
BUG();
}
}
} else { } else {
/* /*
* Packet with erroneous checksum, let the * Packet with erroneous checksum, let the
...@@ -5677,25 +5814,31 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) ...@@ -5677,25 +5814,31 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp)
skb->ip_summed = CHECKSUM_NONE; skb->ip_summed = CHECKSUM_NONE;
} }
skb->protocol = eth_type_trans(skb, dev); if (!sp->lro) {
skb->protocol = eth_type_trans(skb, dev);
#ifdef CONFIG_S2IO_NAPI #ifdef CONFIG_S2IO_NAPI
if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) { if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
/* Queueing the vlan frame to the upper layer */ /* Queueing the vlan frame to the upper layer */
vlan_hwaccel_receive_skb(skb, sp->vlgrp, vlan_hwaccel_receive_skb(skb, sp->vlgrp,
RXD_GET_VLAN_TAG(rxdp->Control_2)); RXD_GET_VLAN_TAG(rxdp->Control_2));
} else { } else {
netif_receive_skb(skb); netif_receive_skb(skb);
} }
#else #else
if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) { if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
/* Queueing the vlan frame to the upper layer */ /* Queueing the vlan frame to the upper layer */
vlan_hwaccel_rx(skb, sp->vlgrp, vlan_hwaccel_rx(skb, sp->vlgrp,
RXD_GET_VLAN_TAG(rxdp->Control_2)); RXD_GET_VLAN_TAG(rxdp->Control_2));
} else { } else {
netif_rx(skb); netif_rx(skb);
} }
#endif #endif
} else {
send_up:
queue_rx_frame(skb);
}
dev->last_rx = jiffies; dev->last_rx = jiffies;
aggregate:
atomic_dec(&sp->rx_bufs_left[ring_no]); atomic_dec(&sp->rx_bufs_left[ring_no]);
return SUCCESS; return SUCCESS;
} }
...@@ -5807,6 +5950,8 @@ module_param(indicate_max_pkts, int, 0); ...@@ -5807,6 +5950,8 @@ module_param(indicate_max_pkts, int, 0);
#endif #endif
module_param(rxsync_frequency, int, 0); module_param(rxsync_frequency, int, 0);
module_param(intr_type, int, 0); module_param(intr_type, int, 0);
module_param(lro, int, 0);
module_param(lro_max_pkts, int, 0);
/** /**
* s2io_init_nic - Initialization of the adapter . * s2io_init_nic - Initialization of the adapter .
...@@ -5938,6 +6083,7 @@ Defaulting to INTA\n"); ...@@ -5938,6 +6083,7 @@ Defaulting to INTA\n");
else else
sp->device_type = XFRAME_I_DEVICE; sp->device_type = XFRAME_I_DEVICE;
sp->lro = lro;
/* Initialize some PCI/PCI-X fields of the NIC. */ /* Initialize some PCI/PCI-X fields of the NIC. */
s2io_init_pci(sp); s2io_init_pci(sp);
...@@ -6241,6 +6387,10 @@ Defaulting to INTA\n"); ...@@ -6241,6 +6387,10 @@ Defaulting to INTA\n");
DBG_PRINT(ERR_DBG, "%s: 3-Buffer mode support has been " DBG_PRINT(ERR_DBG, "%s: 3-Buffer mode support has been "
"enabled\n",dev->name); "enabled\n",dev->name);
if (sp->lro)
DBG_PRINT(ERR_DBG, "%s: Large receive offload enabled\n",
dev->name);
/* Initialize device name */ /* Initialize device name */
strcpy(sp->name, dev->name); strcpy(sp->name, dev->name);
if (sp->device_type & XFRAME_II_DEVICE) if (sp->device_type & XFRAME_II_DEVICE)
...@@ -6351,3 +6501,318 @@ static void s2io_closer(void) ...@@ -6351,3 +6501,318 @@ static void s2io_closer(void)
module_init(s2io_starter); module_init(s2io_starter);
module_exit(s2io_closer); module_exit(s2io_closer);
static int check_L2_lro_capable(u8 *buffer, struct iphdr **ip,
struct tcphdr **tcp, RxD_t *rxdp)
{
int ip_off;
u8 l2_type = (u8)((rxdp->Control_1 >> 37) & 0x7), ip_len;
if (!(rxdp->Control_1 & RXD_FRAME_PROTO_TCP)) {
DBG_PRINT(INIT_DBG,"%s: Non-TCP frames not supported for LRO\n",
__FUNCTION__);
return -1;
}
/* TODO:
* By default the VLAN field in the MAC is stripped by the card, if this
* feature is turned off in rx_pa_cfg register, then the ip_off field
* has to be shifted by a further 2 bytes
*/
switch (l2_type) {
case 0: /* DIX type */
case 4: /* DIX type with VLAN */
ip_off = HEADER_ETHERNET_II_802_3_SIZE;
break;
/* LLC, SNAP etc are considered non-mergeable */
default:
return -1;
}
*ip = (struct iphdr *)((u8 *)buffer + ip_off);
ip_len = (u8)((*ip)->ihl);
ip_len <<= 2;
*tcp = (struct tcphdr *)((unsigned long)*ip + ip_len);
return 0;
}
static int check_for_socket_match(lro_t *lro, struct iphdr *ip,
struct tcphdr *tcp)
{
DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
if ((lro->iph->saddr != ip->saddr) || (lro->iph->daddr != ip->daddr) ||
(lro->tcph->source != tcp->source) || (lro->tcph->dest != tcp->dest))
return -1;
return 0;
}
static inline int get_l4_pyld_length(struct iphdr *ip, struct tcphdr *tcp)
{
return(ntohs(ip->tot_len) - (ip->ihl << 2) - (tcp->doff << 2));
}
static void initiate_new_session(lro_t *lro, u8 *l2h,
struct iphdr *ip, struct tcphdr *tcp, u32 tcp_pyld_len)
{
DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
lro->l2h = l2h;
lro->iph = ip;
lro->tcph = tcp;
lro->tcp_next_seq = tcp_pyld_len + ntohl(tcp->seq);
lro->tcp_ack = ntohl(tcp->ack_seq);
lro->sg_num = 1;
lro->total_len = ntohs(ip->tot_len);
lro->frags_len = 0;
/*
* check if we saw TCP timestamp. Other consistency checks have
* already been done.
*/
if (tcp->doff == 8) {
u32 *ptr;
ptr = (u32 *)(tcp+1);
lro->saw_ts = 1;
lro->cur_tsval = *(ptr+1);
lro->cur_tsecr = *(ptr+2);
}
lro->in_use = 1;
}
static void update_L3L4_header(nic_t *sp, lro_t *lro)
{
struct iphdr *ip = lro->iph;
struct tcphdr *tcp = lro->tcph;
u16 nchk;
StatInfo_t *statinfo = sp->mac_control.stats_info;
DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
/* Update L3 header */
ip->tot_len = htons(lro->total_len);
ip->check = 0;
nchk = ip_fast_csum((u8 *)lro->iph, ip->ihl);
ip->check = nchk;
/* Update L4 header */
tcp->ack_seq = lro->tcp_ack;
tcp->window = lro->window;
/* Update tsecr field if this session has timestamps enabled */
if (lro->saw_ts) {
u32 *ptr = (u32 *)(tcp + 1);
*(ptr+2) = lro->cur_tsecr;
}
/* Update counters required for calculation of
* average no. of packets aggregated.
*/
statinfo->sw_stat.sum_avg_pkts_aggregated += lro->sg_num;
statinfo->sw_stat.num_aggregations++;
}
static void aggregate_new_rx(lro_t *lro, struct iphdr *ip,
struct tcphdr *tcp, u32 l4_pyld)
{
DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
lro->total_len += l4_pyld;
lro->frags_len += l4_pyld;
lro->tcp_next_seq += l4_pyld;
lro->sg_num++;
/* Update ack seq no. and window ad(from this pkt) in LRO object */
lro->tcp_ack = tcp->ack_seq;
lro->window = tcp->window;
if (lro->saw_ts) {
u32 *ptr;
/* Update tsecr and tsval from this packet */
ptr = (u32 *) (tcp + 1);
lro->cur_tsval = *(ptr + 1);
lro->cur_tsecr = *(ptr + 2);
}
}
static int verify_l3_l4_lro_capable(lro_t *l_lro, struct iphdr *ip,
struct tcphdr *tcp, u32 tcp_pyld_len)
{
u8 *ptr;
DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
if (!tcp_pyld_len) {
/* Runt frame or a pure ack */
return -1;
}
if (ip->ihl != 5) /* IP has options */
return -1;
if (tcp->urg || tcp->psh || tcp->rst || tcp->syn || tcp->fin ||
!tcp->ack) {
/*
* Currently recognize only the ack control word and
* any other control field being set would result in
* flushing the LRO session
*/
return -1;
}
/*
* Allow only one TCP timestamp option. Don't aggregate if
* any other options are detected.
*/
if (tcp->doff != 5 && tcp->doff != 8)
return -1;
if (tcp->doff == 8) {
ptr = (u8 *)(tcp + 1);
while (*ptr == TCPOPT_NOP)
ptr++;
if (*ptr != TCPOPT_TIMESTAMP || *(ptr+1) != TCPOLEN_TIMESTAMP)
return -1;
/* Ensure timestamp value increases monotonically */
if (l_lro)
if (l_lro->cur_tsval > *((u32 *)(ptr+2)))
return -1;
/* timestamp echo reply should be non-zero */
if (*((u32 *)(ptr+6)) == 0)
return -1;
}
return 0;
}
static int
s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, lro_t **lro,
RxD_t *rxdp, nic_t *sp)
{
struct iphdr *ip;
struct tcphdr *tcph;
int ret = 0, i;
if (!(ret = check_L2_lro_capable(buffer, &ip, (struct tcphdr **)tcp,
rxdp))) {
DBG_PRINT(INFO_DBG,"IP Saddr: %x Daddr: %x\n",
ip->saddr, ip->daddr);
} else {
return ret;
}
tcph = (struct tcphdr *)*tcp;
*tcp_len = get_l4_pyld_length(ip, tcph);
for (i=0; i<MAX_LRO_SESSIONS; i++) {
lro_t *l_lro = &sp->lro0_n[i];
if (l_lro->in_use) {
if (check_for_socket_match(l_lro, ip, tcph))
continue;
/* Sock pair matched */
*lro = l_lro;
if ((*lro)->tcp_next_seq != ntohl(tcph->seq)) {
DBG_PRINT(INFO_DBG, "%s:Out of order. expected "
"0x%x, actual 0x%x\n", __FUNCTION__,
(*lro)->tcp_next_seq,
ntohl(tcph->seq));
sp->mac_control.stats_info->
sw_stat.outof_sequence_pkts++;
ret = 2;
break;
}
if (!verify_l3_l4_lro_capable(l_lro, ip, tcph,*tcp_len))
ret = 1; /* Aggregate */
else
ret = 2; /* Flush both */
break;
}
}
if (ret == 0) {
/* Before searching for available LRO objects,
* check if the pkt is L3/L4 aggregatable. If not
* don't create new LRO session. Just send this
* packet up.
*/
if (verify_l3_l4_lro_capable(NULL, ip, tcph, *tcp_len)) {
return 5;
}
for (i=0; i<MAX_LRO_SESSIONS; i++) {
lro_t *l_lro = &sp->lro0_n[i];
if (!(l_lro->in_use)) {
*lro = l_lro;
ret = 3; /* Begin anew */
break;
}
}
}
if (ret == 0) { /* sessions exceeded */
DBG_PRINT(INFO_DBG,"%s:All LRO sessions already in use\n",
__FUNCTION__);
*lro = NULL;
return ret;
}
switch (ret) {
case 3:
initiate_new_session(*lro, buffer, ip, tcph, *tcp_len);
break;
case 2:
update_L3L4_header(sp, *lro);
break;
case 1:
aggregate_new_rx(*lro, ip, tcph, *tcp_len);
if ((*lro)->sg_num == sp->lro_max_aggr_per_sess) {
update_L3L4_header(sp, *lro);
ret = 4; /* Flush the LRO */
}
break;
default:
DBG_PRINT(ERR_DBG,"%s:Dont know, can't say!!\n",
__FUNCTION__);
break;
}
return ret;
}
static void clear_lro_session(lro_t *lro)
{
static u16 lro_struct_size = sizeof(lro_t);
memset(lro, 0, lro_struct_size);
}
static void queue_rx_frame(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
skb->protocol = eth_type_trans(skb, dev);
#ifdef CONFIG_S2IO_NAPI
netif_receive_skb(skb);
#else
netif_rx(skb);
#endif
}
static void lro_append_pkt(nic_t *sp, lro_t *lro, struct sk_buff *skb,
u32 tcp_len)
{
struct sk_buff *tmp, *first = lro->parent;
first->len += tcp_len;
first->data_len = lro->frags_len;
skb_pull(skb, (skb->len - tcp_len));
if ((tmp = skb_shinfo(first)->frag_list)) {
while (tmp->next)
tmp = tmp->next;
tmp->next = skb;
}
else
skb_shinfo(first)->frag_list = skb;
sp->mac_control.stats_info->sw_stat.clubbed_frms_cnt++;
return;
}
...@@ -78,6 +78,13 @@ static int debug_level = ERR_DBG; ...@@ -78,6 +78,13 @@ static int debug_level = ERR_DBG;
typedef struct { typedef struct {
unsigned long long single_ecc_errs; unsigned long long single_ecc_errs;
unsigned long long double_ecc_errs; unsigned long long double_ecc_errs;
/* LRO statistics */
unsigned long long clubbed_frms_cnt;
unsigned long long sending_both;
unsigned long long outof_sequence_pkts;
unsigned long long flush_max_pkts;
unsigned long long sum_avg_pkts_aggregated;
unsigned long long num_aggregations;
} swStat_t; } swStat_t;
/* The statistics block of Xena */ /* The statistics block of Xena */
...@@ -680,6 +687,24 @@ struct msix_info_st { ...@@ -680,6 +687,24 @@ struct msix_info_st {
u64 data; u64 data;
}; };
/* Data structure to represent a LRO session */
typedef struct lro {
struct sk_buff *parent;
u8 *l2h;
struct iphdr *iph;
struct tcphdr *tcph;
u32 tcp_next_seq;
u32 tcp_ack;
int total_len;
int frags_len;
int sg_num;
int in_use;
u16 window;
u32 cur_tsval;
u32 cur_tsecr;
u8 saw_ts;
}lro_t;
/* Structure representing one instance of the NIC */ /* Structure representing one instance of the NIC */
struct s2io_nic { struct s2io_nic {
int rxd_mode; int rxd_mode;
...@@ -784,6 +809,13 @@ struct s2io_nic { ...@@ -784,6 +809,13 @@ struct s2io_nic {
#define XFRAME_II_DEVICE 2 #define XFRAME_II_DEVICE 2
u8 device_type; u8 device_type;
#define MAX_LRO_SESSIONS 32
lro_t lro0_n[MAX_LRO_SESSIONS];
unsigned long clubbed_frms_cnt;
unsigned long sending_both;
u8 lro;
u16 lro_max_aggr_per_sess;
#define INTA 0 #define INTA 0
#define MSI 1 #define MSI 1
#define MSI_X 2 #define MSI_X 2
...@@ -937,4 +969,10 @@ static void s2io_card_down(nic_t *nic); ...@@ -937,4 +969,10 @@ static void s2io_card_down(nic_t *nic);
static int s2io_card_up(nic_t *nic); static int s2io_card_up(nic_t *nic);
static int get_xena_rev_id(struct pci_dev *pdev); static int get_xena_rev_id(struct pci_dev *pdev);
static void restore_xmsi_data(nic_t *nic); static void restore_xmsi_data(nic_t *nic);
static int s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, lro_t **lro, RxD_t *rxdp, nic_t *sp);
static void clear_lro_session(lro_t *lro);
static void queue_rx_frame(struct sk_buff *skb);
static void update_L3L4_header(nic_t *sp, lro_t *lro);
static void lro_append_pkt(nic_t *sp, lro_t *lro, struct sk_buff *skb, u32 tcp_len);
#endif /* _S2IO_H */ #endif /* _S2IO_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment