Commit 6e909283 authored by Nimrod Andy's avatar Nimrod Andy Committed by David S. Miller

net: fec: Add Scatter/gather support

Add Scatter/gather support for FEC.
This feature allows to improve outbound throughput performance.

Tested on imx6dl sabresd board:
Running iperf tests shows a 55.4% improvement.

$ ethtool -K eth0 sg off
$ iperf -c 10.192.242.167 -t 3 &
[  3] local 10.192.242.108 port 52618 connected with 10.192.242.167 port 5001
[ ID] Interval       Transfer     Bandwidth
[  3]  0.0- 3.0 sec  99.5 MBytes   278 Mbits/sec

$ ethtool -K eth0 sg on
$ iperf -c 10.192.242.167 -t 3 &
[  3] local 10.192.242.108 port 52617 connected with 10.192.242.167 port 5001
[ ID] Interval       Transfer     Bandwidth
[  3]  0.0- 3.0 sec   154 MBytes   432 Mbits/sec

CC: Li Frank <B20596@freescale.com>
Signed-off-by: default avatarFugang Duan <B38611@freescale.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 55d0218a
...@@ -221,7 +221,7 @@ struct bufdesc_ex { ...@@ -221,7 +221,7 @@ struct bufdesc_ex {
#define BD_ENET_TX_RCMASK ((ushort)0x003c) #define BD_ENET_TX_RCMASK ((ushort)0x003c)
#define BD_ENET_TX_UN ((ushort)0x0002) #define BD_ENET_TX_UN ((ushort)0x0002)
#define BD_ENET_TX_CSL ((ushort)0x0001) #define BD_ENET_TX_CSL ((ushort)0x0001)
#define BD_ENET_TX_STATS ((ushort)0x03ff) /* All status bits */ #define BD_ENET_TX_STATS ((ushort)0x0fff) /* All status bits */
/*enhanced buffer descriptor control/status used by Ethernet transmit*/ /*enhanced buffer descriptor control/status used by Ethernet transmit*/
#define BD_ENET_TX_INT 0x40000000 #define BD_ENET_TX_INT 0x40000000
......
...@@ -289,6 +289,16 @@ static int fec_enet_get_bd_index(struct bufdesc *base, struct bufdesc *bdp, ...@@ -289,6 +289,16 @@ static int fec_enet_get_bd_index(struct bufdesc *base, struct bufdesc *bdp,
return ((const char *)bdp - (const char *)base) / fep->bufdesc_size; return ((const char *)bdp - (const char *)base) / fep->bufdesc_size;
} }
static int fec_enet_get_free_txdesc_num(struct fec_enet_private *fep)
{
int entries;
entries = ((const char *)fep->dirty_tx -
(const char *)fep->cur_tx) / fep->bufdesc_size - 1;
return entries > 0 ? entries : entries + fep->tx_ring_size;
}
static void *swap_buffer(void *bufaddr, int len) static void *swap_buffer(void *bufaddr, int len)
{ {
int i; int i;
...@@ -316,20 +326,119 @@ fec_enet_clear_csum(struct sk_buff *skb, struct net_device *ndev) ...@@ -316,20 +326,119 @@ fec_enet_clear_csum(struct sk_buff *skb, struct net_device *ndev)
return 0; return 0;
} }
static int txq_submit_skb(struct sk_buff *skb, struct net_device *ndev) static void
fec_enet_submit_work(struct bufdesc *bdp, struct fec_enet_private *fep)
{
const struct platform_device_id *id_entry =
platform_get_device_id(fep->pdev);
struct bufdesc *bdp_pre;
bdp_pre = fec_enet_get_prevdesc(bdp, fep);
if ((id_entry->driver_data & FEC_QUIRK_ERR006358) &&
!(bdp_pre->cbd_sc & BD_ENET_TX_READY)) {
fep->delay_work.trig_tx = true;
schedule_delayed_work(&(fep->delay_work.delay_work),
msecs_to_jiffies(1));
}
}
static int
fec_enet_txq_submit_frag_skb(struct sk_buff *skb, struct net_device *ndev)
{ {
struct fec_enet_private *fep = netdev_priv(ndev); struct fec_enet_private *fep = netdev_priv(ndev);
const struct platform_device_id *id_entry = const struct platform_device_id *id_entry =
platform_get_device_id(fep->pdev); platform_get_device_id(fep->pdev);
struct bufdesc *bdp, *bdp_pre; struct bufdesc *bdp = fep->cur_tx;
void *bufaddr; struct bufdesc_ex *ebdp;
unsigned short status; int nr_frags = skb_shinfo(skb)->nr_frags;
int frag, frag_len;
unsigned short status;
unsigned int estatus = 0;
skb_frag_t *this_frag;
unsigned int index; unsigned int index;
void *bufaddr;
int i;
/* Fill in a Tx ring entry */ for (frag = 0; frag < nr_frags; frag++) {
this_frag = &skb_shinfo(skb)->frags[frag];
bdp = fec_enet_get_nextdesc(bdp, fep);
ebdp = (struct bufdesc_ex *)bdp;
status = bdp->cbd_sc;
status &= ~BD_ENET_TX_STATS;
status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
frag_len = skb_shinfo(skb)->frags[frag].size;
/* Handle the last BD specially */
if (frag == nr_frags - 1) {
status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
if (fep->bufdesc_ex) {
estatus |= BD_ENET_TX_INT;
if (unlikely(skb_shinfo(skb)->tx_flags &
SKBTX_HW_TSTAMP && fep->hwts_tx_en))
estatus |= BD_ENET_TX_TS;
}
}
if (fep->bufdesc_ex) {
if (skb->ip_summed == CHECKSUM_PARTIAL)
estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
ebdp->cbd_bdu = 0;
ebdp->cbd_esc = estatus;
}
bufaddr = page_address(this_frag->page.p) + this_frag->page_offset;
index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
memcpy(fep->tx_bounce[index], bufaddr, frag_len);
bufaddr = fep->tx_bounce[index];
if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
swap_buffer(bufaddr, frag_len);
}
bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,
frag_len, DMA_TO_DEVICE);
if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
dev_kfree_skb_any(skb);
if (net_ratelimit())
netdev_err(ndev, "Tx DMA memory map failed\n");
goto dma_mapping_error;
}
bdp->cbd_datlen = frag_len;
bdp->cbd_sc = status;
}
fep->cur_tx = bdp;
return 0;
dma_mapping_error:
bdp = fep->cur_tx; bdp = fep->cur_tx;
for (i = 0; i < frag; i++) {
bdp = fec_enet_get_nextdesc(bdp, fep);
dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
bdp->cbd_datlen, DMA_TO_DEVICE);
}
return NETDEV_TX_OK;
}
status = bdp->cbd_sc; static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
{
struct fec_enet_private *fep = netdev_priv(ndev);
const struct platform_device_id *id_entry =
platform_get_device_id(fep->pdev);
int nr_frags = skb_shinfo(skb)->nr_frags;
struct bufdesc *bdp, *last_bdp;
void *bufaddr;
unsigned short status;
unsigned short buflen;
unsigned int estatus = 0;
unsigned int index;
int ret;
/* Protocol checksum off-load for TCP and UDP. */ /* Protocol checksum off-load for TCP and UDP. */
if (fec_enet_clear_csum(skb, ndev)) { if (fec_enet_clear_csum(skb, ndev)) {
...@@ -337,82 +446,83 @@ static int txq_submit_skb(struct sk_buff *skb, struct net_device *ndev) ...@@ -337,82 +446,83 @@ static int txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
/* Clear all of the status flags */ /* Fill in a Tx ring entry */
bdp = fep->cur_tx;
status = bdp->cbd_sc;
status &= ~BD_ENET_TX_STATS; status &= ~BD_ENET_TX_STATS;
/* Set buffer length and buffer pointer */ /* Set buffer length and buffer pointer */
bufaddr = skb->data; bufaddr = skb->data;
bdp->cbd_datlen = skb->len; buflen = skb_headlen(skb);
index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
if (((unsigned long) bufaddr) & FEC_ALIGNMENT) { id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
memcpy(fep->tx_bounce[index], skb->data, skb->len); memcpy(fep->tx_bounce[index], skb->data, buflen);
bufaddr = fep->tx_bounce[index]; bufaddr = fep->tx_bounce[index];
}
/* if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
* Some design made an incorrect assumption on endian mode of swap_buffer(bufaddr, buflen);
* the system that it's running on. As the result, driver has to }
* swap every frame going to and coming from the controller.
*/
if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
swap_buffer(bufaddr, skb->len);
/* Save skb pointer */
fep->tx_skbuff[index] = skb;
/* Push the data cache so the CPM does not get stale memory /* Push the data cache so the CPM does not get stale memory
* data. * data.
*/ */
bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr, bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,
skb->len, DMA_TO_DEVICE); buflen, DMA_TO_DEVICE);
if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) { if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
bdp->cbd_bufaddr = 0;
fep->tx_skbuff[index] = NULL;
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
if (net_ratelimit()) if (net_ratelimit())
netdev_err(ndev, "Tx DMA memory map failed\n"); netdev_err(ndev, "Tx DMA memory map failed\n");
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
if (nr_frags) {
ret = fec_enet_txq_submit_frag_skb(skb, ndev);
if (ret)
return ret;
} else {
status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
if (fep->bufdesc_ex) {
estatus = BD_ENET_TX_INT;
if (unlikely(skb_shinfo(skb)->tx_flags &
SKBTX_HW_TSTAMP && fep->hwts_tx_en))
estatus |= BD_ENET_TX_TS;
}
}
if (fep->bufdesc_ex) { if (fep->bufdesc_ex) {
struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
ebdp->cbd_bdu = 0;
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
fep->hwts_tx_en)) { fep->hwts_tx_en))
ebdp->cbd_esc = (BD_ENET_TX_TS | BD_ENET_TX_INT);
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
} else {
ebdp->cbd_esc = BD_ENET_TX_INT;
/* Enable protocol checksum flags if (skb->ip_summed == CHECKSUM_PARTIAL)
* We do not bother with the IP Checksum bits as they estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
* are done by the kernel
*/ ebdp->cbd_bdu = 0;
if (skb->ip_summed == CHECKSUM_PARTIAL) ebdp->cbd_esc = estatus;
ebdp->cbd_esc |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
}
} }
last_bdp = fep->cur_tx;
index = fec_enet_get_bd_index(fep->tx_bd_base, last_bdp, fep);
/* Save skb pointer */
fep->tx_skbuff[index] = skb;
bdp->cbd_datlen = buflen;
/* Send it on its way. Tell FEC it's ready, interrupt when done, /* Send it on its way. Tell FEC it's ready, interrupt when done,
* it's the last BD of the frame, and to put the CRC on the end. * it's the last BD of the frame, and to put the CRC on the end.
*/ */
status |= (BD_ENET_TX_READY | BD_ENET_TX_INTR status |= (BD_ENET_TX_READY | BD_ENET_TX_TC);
| BD_ENET_TX_LAST | BD_ENET_TX_TC);
bdp->cbd_sc = status; bdp->cbd_sc = status;
bdp_pre = fec_enet_get_prevdesc(bdp, fep); fec_enet_submit_work(bdp, fep);
if ((id_entry->driver_data & FEC_QUIRK_ERR006358) &&
!(bdp_pre->cbd_sc & BD_ENET_TX_READY)) {
fep->delay_work.trig_tx = true;
schedule_delayed_work(&(fep->delay_work.delay_work),
msecs_to_jiffies(1));
}
/* If this was the last BD in the ring, start at the beginning again. */ /* If this was the last BD in the ring, start at the beginning again. */
bdp = fec_enet_get_nextdesc(bdp, fep); bdp = fec_enet_get_nextdesc(last_bdp, fep);
skb_tx_timestamp(skb); skb_tx_timestamp(skb);
...@@ -421,7 +531,7 @@ static int txq_submit_skb(struct sk_buff *skb, struct net_device *ndev) ...@@ -421,7 +531,7 @@ static int txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
/* Trigger transmission start */ /* Trigger transmission start */
writel(0, fep->hwp + FEC_X_DES_ACTIVE); writel(0, fep->hwp + FEC_X_DES_ACTIVE);
return NETDEV_TX_OK; return 0;
} }
static netdev_tx_t static netdev_tx_t
...@@ -430,6 +540,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) ...@@ -430,6 +540,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
struct fec_enet_private *fep = netdev_priv(ndev); struct fec_enet_private *fep = netdev_priv(ndev);
struct bufdesc *bdp; struct bufdesc *bdp;
unsigned short status; unsigned short status;
int entries_free;
int ret; int ret;
/* Fill in a Tx ring entry */ /* Fill in a Tx ring entry */
...@@ -441,15 +552,17 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) ...@@ -441,15 +552,17 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
/* Ooops. All transmit buffers are full. Bail out. /* Ooops. All transmit buffers are full. Bail out.
* This should not happen, since ndev->tbusy should be set. * This should not happen, since ndev->tbusy should be set.
*/ */
netdev_err(ndev, "tx queue full!\n"); if (net_ratelimit())
netdev_err(ndev, "tx queue full!\n");
return NETDEV_TX_BUSY; return NETDEV_TX_BUSY;
} }
ret = txq_submit_skb(skb, ndev); ret = fec_enet_txq_submit_skb(skb, ndev);
if (ret == -EBUSY) if (ret)
return NETDEV_TX_BUSY; return ret;
if (fep->cur_tx == fep->dirty_tx) entries_free = fec_enet_get_free_txdesc_num(fep);
if (entries_free < MAX_SKB_FRAGS + 1)
netif_stop_queue(ndev); netif_stop_queue(ndev);
return NETDEV_TX_OK; return NETDEV_TX_OK;
...@@ -770,6 +883,7 @@ fec_enet_tx(struct net_device *ndev) ...@@ -770,6 +883,7 @@ fec_enet_tx(struct net_device *ndev)
unsigned short status; unsigned short status;
struct sk_buff *skb; struct sk_buff *skb;
int index = 0; int index = 0;
int entries;
fep = netdev_priv(ndev); fep = netdev_priv(ndev);
bdp = fep->dirty_tx; bdp = fep->dirty_tx;
...@@ -786,9 +900,13 @@ fec_enet_tx(struct net_device *ndev) ...@@ -786,9 +900,13 @@ fec_enet_tx(struct net_device *ndev)
index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
skb = fep->tx_skbuff[index]; skb = fep->tx_skbuff[index];
dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, skb->len, dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, bdp->cbd_datlen,
DMA_TO_DEVICE); DMA_TO_DEVICE);
bdp->cbd_bufaddr = 0; bdp->cbd_bufaddr = 0;
if (!skb) {
bdp = fec_enet_get_nextdesc(bdp, fep);
continue;
}
/* Check for errors. */ /* Check for errors. */
if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC | if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC |
...@@ -807,7 +925,7 @@ fec_enet_tx(struct net_device *ndev) ...@@ -807,7 +925,7 @@ fec_enet_tx(struct net_device *ndev)
ndev->stats.tx_carrier_errors++; ndev->stats.tx_carrier_errors++;
} else { } else {
ndev->stats.tx_packets++; ndev->stats.tx_packets++;
ndev->stats.tx_bytes += bdp->cbd_datlen; ndev->stats.tx_bytes += skb->len;
} }
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) && if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) &&
...@@ -844,15 +962,13 @@ fec_enet_tx(struct net_device *ndev) ...@@ -844,15 +962,13 @@ fec_enet_tx(struct net_device *ndev)
/* Since we have freed up a buffer, the ring is no longer full /* Since we have freed up a buffer, the ring is no longer full
*/ */
if (fep->dirty_tx != fep->cur_tx) { entries = fec_enet_get_free_txdesc_num(fep);
if (netif_queue_stopped(ndev)) if (entries >= MAX_SKB_FRAGS + 1 && netif_queue_stopped(ndev))
netif_wake_queue(ndev); netif_wake_queue(ndev);
}
} }
return; return;
} }
/* During a receive, the cur_rx points to the current incoming buffer. /* During a receive, the cur_rx points to the current incoming buffer.
* When we update through the ring, if the next incoming buffer has * When we update through the ring, if the next incoming buffer has
* not been given to the system, we just set the empty indicator, * not been given to the system, we just set the empty indicator,
...@@ -2095,7 +2211,7 @@ static int fec_enet_init(struct net_device *ndev) ...@@ -2095,7 +2211,7 @@ static int fec_enet_init(struct net_device *ndev)
if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) { if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) {
/* enable hw accelerator */ /* enable hw accelerator */
ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
| NETIF_F_RXCSUM); | NETIF_F_RXCSUM | NETIF_F_SG);
fep->csum_flags |= FLAG_RX_CSUM_ENABLED; fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment