Commit fddb8872 authored by David S. Miller's avatar David S. Miller

Merge branch 'sw_tso'

Ezequiel Garcia says:

====================
net: Introduce a software TSO helper API

Here's a first proposal for a generic software TSO helper API, following
David's suggestion.

There are at least two drivers that currently implement some form of software
TSO: Solarflare network driver (drivers/net/ethernet/sfc) and Tilera GX
network driver (drivers/net/ethernet/tile/tilegx.c).

The rationale behind adding a generic API is to provide a boiler plate with the
segmentation and other common tasks, making this support easier to add in other
drivers.

When designing the API, I've considered mainly two design choices:

  1. Implement a series of callbacks that each driver would implement
     and the net core code would call to fill in descriptors and egress
     that data.

  2. Implement an API for drivers to use in a driver's specific tx_tso
     function. This functions would exhaust a sk_buff payload, and use the
     API as helper for building the headers and segmented data.

I've chosen (2), to avoid function pointers (which was Willy's concern) and
because it seemed less fragile. Of course, this is argueable.

The API is by no means complete, and lacks some features, however it allows
to support TSO in mv643xx_eth and mvneta network drivers with some very
good performance results. I've added this support as an example of the API
in action.

In particular the following needs some revisiting:

  1. IPv6 support is lacking.

  2. The required descriptor counting needs some verification. The current
     implementation might be too "sketchy". The tilegx one can be a good
     starting point.

  3. The implemenation assumes the hardware can compute the TCP and IP
     checksums for the built headers. However, some controllers may need
     some initial calculation (such as tilegx, for instance).

Despite this, I hope this proposal is good enough to trigger some discussion
and to check if I'm on the right track. Feedback is much appreciated!
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 8af750d7 3ae8f4e0
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/in.h> #include <linux/in.h>
#include <linux/ip.h> #include <linux/ip.h>
#include <net/tso.h>
#include <linux/tcp.h> #include <linux/tcp.h>
#include <linux/udp.h> #include <linux/udp.h>
#include <linux/etherdevice.h> #include <linux/etherdevice.h>
...@@ -179,9 +180,10 @@ static char mv643xx_eth_driver_version[] = "1.4"; ...@@ -179,9 +180,10 @@ static char mv643xx_eth_driver_version[] = "1.4";
* Misc definitions. * Misc definitions.
*/ */
#define DEFAULT_RX_QUEUE_SIZE 128 #define DEFAULT_RX_QUEUE_SIZE 128
#define DEFAULT_TX_QUEUE_SIZE 256 #define DEFAULT_TX_QUEUE_SIZE 512
#define SKB_DMA_REALIGN ((PAGE_SIZE - NET_SKB_PAD) % SMP_CACHE_BYTES) #define SKB_DMA_REALIGN ((PAGE_SIZE - NET_SKB_PAD) % SMP_CACHE_BYTES)
#define TSO_HEADER_SIZE 128
/* /*
* RX/TX descriptors. * RX/TX descriptors.
...@@ -250,6 +252,7 @@ struct tx_desc { ...@@ -250,6 +252,7 @@ struct tx_desc {
#define GEN_TCP_UDP_CHECKSUM 0x00020000 #define GEN_TCP_UDP_CHECKSUM 0x00020000
#define UDP_FRAME 0x00010000 #define UDP_FRAME 0x00010000
#define MAC_HDR_EXTRA_4_BYTES 0x00008000 #define MAC_HDR_EXTRA_4_BYTES 0x00008000
#define GEN_TCP_UDP_CHK_FULL 0x00000400
#define MAC_HDR_EXTRA_8_BYTES 0x00000200 #define MAC_HDR_EXTRA_8_BYTES 0x00000200
#define TX_IHL_SHIFT 11 #define TX_IHL_SHIFT 11
...@@ -345,6 +348,9 @@ struct tx_queue { ...@@ -345,6 +348,9 @@ struct tx_queue {
int tx_curr_desc; int tx_curr_desc;
int tx_used_desc; int tx_used_desc;
char *tso_hdrs;
dma_addr_t tso_hdrs_dma;
struct tx_desc *tx_desc_area; struct tx_desc *tx_desc_area;
dma_addr_t tx_desc_dma; dma_addr_t tx_desc_dma;
int tx_desc_area_size; int tx_desc_area_size;
...@@ -661,6 +667,198 @@ static inline unsigned int has_tiny_unaligned_frags(struct sk_buff *skb) ...@@ -661,6 +667,198 @@ static inline unsigned int has_tiny_unaligned_frags(struct sk_buff *skb)
return 0; return 0;
} }
static inline __be16 sum16_as_be(__sum16 sum)
{
return (__force __be16)sum;
}
static int skb_tx_csum(struct mv643xx_eth_private *mp, struct sk_buff *skb,
u16 *l4i_chk, u32 *command, int length)
{
int ret;
u32 cmd = 0;
if (skb->ip_summed == CHECKSUM_PARTIAL) {
int hdr_len;
int tag_bytes;
BUG_ON(skb->protocol != htons(ETH_P_IP) &&
skb->protocol != htons(ETH_P_8021Q));
hdr_len = (void *)ip_hdr(skb) - (void *)skb->data;
tag_bytes = hdr_len - ETH_HLEN;
if (length - hdr_len > mp->shared->tx_csum_limit ||
unlikely(tag_bytes & ~12)) {
ret = skb_checksum_help(skb);
if (!ret)
goto no_csum;
return ret;
}
if (tag_bytes & 4)
cmd |= MAC_HDR_EXTRA_4_BYTES;
if (tag_bytes & 8)
cmd |= MAC_HDR_EXTRA_8_BYTES;
cmd |= GEN_TCP_UDP_CHECKSUM | GEN_TCP_UDP_CHK_FULL |
GEN_IP_V4_CHECKSUM |
ip_hdr(skb)->ihl << TX_IHL_SHIFT;
/* TODO: Revisit this. With the usage of GEN_TCP_UDP_CHK_FULL
* it seems we don't need to pass the initial checksum. */
switch (ip_hdr(skb)->protocol) {
case IPPROTO_UDP:
cmd |= UDP_FRAME;
*l4i_chk = 0;
break;
case IPPROTO_TCP:
*l4i_chk = 0;
break;
default:
WARN(1, "protocol not supported");
}
} else {
no_csum:
/* Errata BTS #50, IHL must be 5 if no HW checksum */
cmd |= 5 << TX_IHL_SHIFT;
}
*command = cmd;
return 0;
}
static inline int
txq_put_data_tso(struct net_device *dev, struct tx_queue *txq,
struct sk_buff *skb, char *data, int length,
bool last_tcp, bool is_last)
{
int tx_index;
u32 cmd_sts;
struct tx_desc *desc;
tx_index = txq->tx_curr_desc++;
if (txq->tx_curr_desc == txq->tx_ring_size)
txq->tx_curr_desc = 0;
desc = &txq->tx_desc_area[tx_index];
desc->l4i_chk = 0;
desc->byte_cnt = length;
desc->buf_ptr = dma_map_single(dev->dev.parent, data,
length, DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(dev->dev.parent, desc->buf_ptr))) {
WARN(1, "dma_map_single failed!\n");
return -ENOMEM;
}
cmd_sts = BUFFER_OWNED_BY_DMA;
if (last_tcp) {
/* last descriptor in the TCP packet */
cmd_sts |= ZERO_PADDING | TX_LAST_DESC;
/* last descriptor in SKB */
if (is_last)
cmd_sts |= TX_ENABLE_INTERRUPT;
}
desc->cmd_sts = cmd_sts;
return 0;
}
static inline void
txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length)
{
struct mv643xx_eth_private *mp = txq_to_mp(txq);
int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
int tx_index;
struct tx_desc *desc;
int ret;
u32 cmd_csum = 0;
u16 l4i_chk = 0;
tx_index = txq->tx_curr_desc;
desc = &txq->tx_desc_area[tx_index];
ret = skb_tx_csum(mp, skb, &l4i_chk, &cmd_csum, length);
if (ret)
WARN(1, "failed to prepare checksum!");
/* Should we set this? Can't use the value from skb_tx_csum()
* as it's not the correct initial L4 checksum to use. */
desc->l4i_chk = 0;
desc->byte_cnt = hdr_len;
desc->buf_ptr = txq->tso_hdrs_dma +
txq->tx_curr_desc * TSO_HEADER_SIZE;
desc->cmd_sts = cmd_csum | BUFFER_OWNED_BY_DMA | TX_FIRST_DESC |
GEN_CRC;
txq->tx_curr_desc++;
if (txq->tx_curr_desc == txq->tx_ring_size)
txq->tx_curr_desc = 0;
}
static int txq_submit_tso(struct tx_queue *txq, struct sk_buff *skb,
struct net_device *dev)
{
struct mv643xx_eth_private *mp = txq_to_mp(txq);
int total_len, data_left, ret;
int desc_count = 0;
struct tso_t tso;
int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
/* Count needed descriptors */
if ((txq->tx_desc_count + tso_count_descs(skb)) >= txq->tx_ring_size) {
netdev_dbg(dev, "not enough descriptors for TSO!\n");
return -EBUSY;
}
/* Initialize the TSO handler, and prepare the first payload */
tso_start(skb, &tso);
total_len = skb->len - hdr_len;
while (total_len > 0) {
char *hdr;
data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
total_len -= data_left;
desc_count++;
/* prepare packet headers: MAC + IP + TCP */
hdr = txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE;
tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
txq_put_hdr_tso(skb, txq, data_left);
while (data_left > 0) {
int size;
desc_count++;
size = min_t(int, tso.size, data_left);
ret = txq_put_data_tso(dev, txq, skb, tso.data, size,
size == data_left,
total_len == 0);
if (ret)
goto err_release;
data_left -= size;
tso_build_data(skb, &tso, size);
}
}
__skb_queue_tail(&txq->tx_skb, skb);
skb_tx_timestamp(skb);
/* clear TX_END status */
mp->work_tx_end &= ~(1 << txq->index);
/* ensure all descriptors are written before poking hardware */
wmb();
txq_enable(txq);
txq->tx_desc_count += desc_count;
return 0;
err_release:
/* TODO: Release all used data descriptors; header descriptors must not
* be DMA-unmapped.
*/
return ret;
}
static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb) static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb)
{ {
struct mv643xx_eth_private *mp = txq_to_mp(txq); struct mv643xx_eth_private *mp = txq_to_mp(txq);
...@@ -671,8 +869,10 @@ static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb) ...@@ -671,8 +869,10 @@ static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb)
skb_frag_t *this_frag; skb_frag_t *this_frag;
int tx_index; int tx_index;
struct tx_desc *desc; struct tx_desc *desc;
void *addr;
this_frag = &skb_shinfo(skb)->frags[frag]; this_frag = &skb_shinfo(skb)->frags[frag];
addr = page_address(this_frag->page.p) + this_frag->page_offset;
tx_index = txq->tx_curr_desc++; tx_index = txq->tx_curr_desc++;
if (txq->tx_curr_desc == txq->tx_ring_size) if (txq->tx_curr_desc == txq->tx_ring_size)
txq->tx_curr_desc = 0; txq->tx_curr_desc = 0;
...@@ -692,18 +892,11 @@ static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb) ...@@ -692,18 +892,11 @@ static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb)
desc->l4i_chk = 0; desc->l4i_chk = 0;
desc->byte_cnt = skb_frag_size(this_frag); desc->byte_cnt = skb_frag_size(this_frag);
desc->buf_ptr = skb_frag_dma_map(mp->dev->dev.parent, desc->buf_ptr = dma_map_single(mp->dev->dev.parent, addr,
this_frag, 0, desc->byte_cnt, DMA_TO_DEVICE);
skb_frag_size(this_frag),
DMA_TO_DEVICE);
} }
} }
static inline __be16 sum16_as_be(__sum16 sum)
{
return (__force __be16)sum;
}
static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb) static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb)
{ {
struct mv643xx_eth_private *mp = txq_to_mp(txq); struct mv643xx_eth_private *mp = txq_to_mp(txq);
...@@ -712,53 +905,17 @@ static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb) ...@@ -712,53 +905,17 @@ static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb)
struct tx_desc *desc; struct tx_desc *desc;
u32 cmd_sts; u32 cmd_sts;
u16 l4i_chk; u16 l4i_chk;
int length; int length, ret;
cmd_sts = TX_FIRST_DESC | GEN_CRC | BUFFER_OWNED_BY_DMA; cmd_sts = 0;
l4i_chk = 0; l4i_chk = 0;
if (skb->ip_summed == CHECKSUM_PARTIAL) { ret = skb_tx_csum(mp, skb, &l4i_chk, &cmd_sts, skb->len);
int hdr_len; if (ret) {
int tag_bytes;
BUG_ON(skb->protocol != htons(ETH_P_IP) &&
skb->protocol != htons(ETH_P_8021Q));
hdr_len = (void *)ip_hdr(skb) - (void *)skb->data;
tag_bytes = hdr_len - ETH_HLEN;
if (skb->len - hdr_len > mp->shared->tx_csum_limit ||
unlikely(tag_bytes & ~12)) {
if (skb_checksum_help(skb) == 0)
goto no_csum;
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
return 1; return ret;
}
if (tag_bytes & 4)
cmd_sts |= MAC_HDR_EXTRA_4_BYTES;
if (tag_bytes & 8)
cmd_sts |= MAC_HDR_EXTRA_8_BYTES;
cmd_sts |= GEN_TCP_UDP_CHECKSUM |
GEN_IP_V4_CHECKSUM |
ip_hdr(skb)->ihl << TX_IHL_SHIFT;
switch (ip_hdr(skb)->protocol) {
case IPPROTO_UDP:
cmd_sts |= UDP_FRAME;
l4i_chk = ntohs(sum16_as_be(udp_hdr(skb)->check));
break;
case IPPROTO_TCP:
l4i_chk = ntohs(sum16_as_be(tcp_hdr(skb)->check));
break;
default:
BUG();
}
} else {
no_csum:
/* Errata BTS #50, IHL must be 5 if no HW checksum */
cmd_sts |= 5 << TX_IHL_SHIFT;
} }
cmd_sts |= TX_FIRST_DESC | GEN_CRC | BUFFER_OWNED_BY_DMA;
tx_index = txq->tx_curr_desc++; tx_index = txq->tx_curr_desc++;
if (txq->tx_curr_desc == txq->tx_ring_size) if (txq->tx_curr_desc == txq->tx_ring_size)
...@@ -801,7 +958,7 @@ static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb) ...@@ -801,7 +958,7 @@ static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb)
static netdev_tx_t mv643xx_eth_xmit(struct sk_buff *skb, struct net_device *dev) static netdev_tx_t mv643xx_eth_xmit(struct sk_buff *skb, struct net_device *dev)
{ {
struct mv643xx_eth_private *mp = netdev_priv(dev); struct mv643xx_eth_private *mp = netdev_priv(dev);
int length, queue; int length, queue, ret;
struct tx_queue *txq; struct tx_queue *txq;
struct netdev_queue *nq; struct netdev_queue *nq;
...@@ -825,7 +982,11 @@ static netdev_tx_t mv643xx_eth_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -825,7 +982,11 @@ static netdev_tx_t mv643xx_eth_xmit(struct sk_buff *skb, struct net_device *dev)
length = skb->len; length = skb->len;
if (!txq_submit_skb(txq, skb)) { if (skb_is_gso(skb))
ret = txq_submit_tso(txq, skb, dev);
else
ret = txq_submit_skb(txq, skb);
if (!ret) {
int entries_left; int entries_left;
txq->tx_bytes += length; txq->tx_bytes += length;
...@@ -834,6 +995,8 @@ static netdev_tx_t mv643xx_eth_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -834,6 +995,8 @@ static netdev_tx_t mv643xx_eth_xmit(struct sk_buff *skb, struct net_device *dev)
entries_left = txq->tx_ring_size - txq->tx_desc_count; entries_left = txq->tx_ring_size - txq->tx_desc_count;
if (entries_left < MAX_SKB_FRAGS + 1) if (entries_left < MAX_SKB_FRAGS + 1)
netif_tx_stop_queue(nq); netif_tx_stop_queue(nq);
} else if (ret == -EBUSY) {
return NETDEV_TX_BUSY;
} }
return NETDEV_TX_OK; return NETDEV_TX_OK;
...@@ -907,14 +1070,8 @@ static int txq_reclaim(struct tx_queue *txq, int budget, int force) ...@@ -907,14 +1070,8 @@ static int txq_reclaim(struct tx_queue *txq, int budget, int force)
mp->dev->stats.tx_errors++; mp->dev->stats.tx_errors++;
} }
if (cmd_sts & TX_FIRST_DESC) {
dma_unmap_single(mp->dev->dev.parent, desc->buf_ptr, dma_unmap_single(mp->dev->dev.parent, desc->buf_ptr,
desc->byte_cnt, DMA_TO_DEVICE); desc->byte_cnt, DMA_TO_DEVICE);
} else {
dma_unmap_page(mp->dev->dev.parent, desc->buf_ptr,
desc->byte_cnt, DMA_TO_DEVICE);
}
dev_kfree_skb(skb); dev_kfree_skb(skb);
} }
...@@ -1871,6 +2028,15 @@ static int txq_init(struct mv643xx_eth_private *mp, int index) ...@@ -1871,6 +2028,15 @@ static int txq_init(struct mv643xx_eth_private *mp, int index)
nexti * sizeof(struct tx_desc); nexti * sizeof(struct tx_desc);
} }
/* Allocate DMA buffers for TSO MAC/IP/TCP headers */
txq->tso_hdrs = dma_alloc_coherent(mp->dev->dev.parent,
txq->tx_ring_size * TSO_HEADER_SIZE,
&txq->tso_hdrs_dma, GFP_KERNEL);
if (txq->tso_hdrs == NULL) {
dma_free_coherent(mp->dev->dev.parent, txq->tx_desc_area_size,
txq->tx_desc_area, txq->tx_desc_dma);
return -ENOMEM;
}
skb_queue_head_init(&txq->tx_skb); skb_queue_head_init(&txq->tx_skb);
return 0; return 0;
...@@ -1891,6 +2057,10 @@ static void txq_deinit(struct tx_queue *txq) ...@@ -1891,6 +2057,10 @@ static void txq_deinit(struct tx_queue *txq)
else else
dma_free_coherent(mp->dev->dev.parent, txq->tx_desc_area_size, dma_free_coherent(mp->dev->dev.parent, txq->tx_desc_area_size,
txq->tx_desc_area, txq->tx_desc_dma); txq->tx_desc_area, txq->tx_desc_dma);
if (txq->tso_hdrs)
dma_free_coherent(mp->dev->dev.parent,
txq->tx_ring_size * TSO_HEADER_SIZE,
txq->tso_hdrs, txq->tso_hdrs_dma);
} }
...@@ -2921,9 +3091,11 @@ static int mv643xx_eth_probe(struct platform_device *pdev) ...@@ -2921,9 +3091,11 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
dev->watchdog_timeo = 2 * HZ; dev->watchdog_timeo = 2 * HZ;
dev->base_addr = 0; dev->base_addr = 0;
dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM; dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM; dev->vlan_features = dev->features;
dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM;
dev->features |= NETIF_F_RXCSUM;
dev->hw_features = dev->features;
dev->priv_flags |= IFF_UNICAST_FLT; dev->priv_flags |= IFF_UNICAST_FLT;
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <net/ip.h> #include <net/ip.h>
#include <net/ipv6.h> #include <net/ipv6.h>
#include <linux/io.h> #include <linux/io.h>
#include <net/tso.h>
#include <linux/of.h> #include <linux/of.h>
#include <linux/of_irq.h> #include <linux/of_irq.h>
#include <linux/of_mdio.h> #include <linux/of_mdio.h>
...@@ -244,6 +245,9 @@ ...@@ -244,6 +245,9 @@
#define MVNETA_TX_MTU_MAX 0x3ffff #define MVNETA_TX_MTU_MAX 0x3ffff
/* TSO header size */
#define TSO_HEADER_SIZE 128
/* Max number of Rx descriptors */ /* Max number of Rx descriptors */
#define MVNETA_MAX_RXD 128 #define MVNETA_MAX_RXD 128
...@@ -413,6 +417,12 @@ struct mvneta_tx_queue { ...@@ -413,6 +417,12 @@ struct mvneta_tx_queue {
/* Index of the next TX DMA descriptor to process */ /* Index of the next TX DMA descriptor to process */
int next_desc_to_proc; int next_desc_to_proc;
/* DMA buffers for TSO headers */
char *tso_hdrs;
/* DMA address of TSO headers */
dma_addr_t tso_hdrs_phys;
}; };
struct mvneta_rx_queue { struct mvneta_rx_queue {
...@@ -1519,6 +1529,126 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, ...@@ -1519,6 +1529,126 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
return rx_done; return rx_done;
} }
static inline void
mvneta_tso_put_hdr(struct sk_buff *skb,
struct mvneta_port *pp, struct mvneta_tx_queue *txq)
{
struct mvneta_tx_desc *tx_desc;
int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
txq->tx_skb[txq->txq_put_index] = NULL;
tx_desc = mvneta_txq_next_desc_get(txq);
tx_desc->data_size = hdr_len;
tx_desc->command = mvneta_skb_tx_csum(pp, skb);
tx_desc->command |= MVNETA_TXD_F_DESC;
tx_desc->buf_phys_addr = txq->tso_hdrs_phys +
txq->txq_put_index * TSO_HEADER_SIZE;
mvneta_txq_inc_put(txq);
}
static inline int
mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq,
struct sk_buff *skb, char *data, int size,
bool last_tcp, bool is_last)
{
struct mvneta_tx_desc *tx_desc;
tx_desc = mvneta_txq_next_desc_get(txq);
tx_desc->data_size = size;
tx_desc->buf_phys_addr = dma_map_single(dev->dev.parent, data,
size, DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(dev->dev.parent,
tx_desc->buf_phys_addr))) {
mvneta_txq_desc_put(txq);
return -ENOMEM;
}
tx_desc->command = 0;
txq->tx_skb[txq->txq_put_index] = NULL;
if (last_tcp) {
/* last descriptor in the TCP packet */
tx_desc->command = MVNETA_TXD_L_DESC;
/* last descriptor in SKB */
if (is_last)
txq->tx_skb[txq->txq_put_index] = skb;
}
mvneta_txq_inc_put(txq);
return 0;
}
static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev,
struct mvneta_tx_queue *txq)
{
int total_len, data_left;
int desc_count = 0;
struct mvneta_port *pp = netdev_priv(dev);
struct tso_t tso;
int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
int i;
/* Count needed descriptors */
if ((txq->count + tso_count_descs(skb)) >= txq->size)
return 0;
if (skb_headlen(skb) < (skb_transport_offset(skb) + tcp_hdrlen(skb))) {
pr_info("*** Is this even possible???!?!?\n");
return 0;
}
/* Initialize the TSO handler, and prepare the first payload */
tso_start(skb, &tso);
total_len = skb->len - hdr_len;
while (total_len > 0) {
char *hdr;
data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
total_len -= data_left;
desc_count++;
/* prepare packet headers: MAC + IP + TCP */
hdr = txq->tso_hdrs + txq->txq_put_index * TSO_HEADER_SIZE;
tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
mvneta_tso_put_hdr(skb, pp, txq);
while (data_left > 0) {
int size;
desc_count++;
size = min_t(int, tso.size, data_left);
if (mvneta_tso_put_data(dev, txq, skb,
tso.data, size,
size == data_left,
total_len == 0))
goto err_release;
data_left -= size;
tso_build_data(skb, &tso, size);
}
}
return desc_count;
err_release:
/* Release all used data descriptors; header descriptors must not
* be DMA-unmapped.
*/
for (i = desc_count - 1; i >= 0; i--) {
struct mvneta_tx_desc *tx_desc = txq->descs + i;
if (!(tx_desc->command & MVNETA_TXD_F_DESC))
dma_unmap_single(pp->dev->dev.parent,
tx_desc->buf_phys_addr,
tx_desc->data_size,
DMA_TO_DEVICE);
mvneta_txq_desc_put(txq);
}
return 0;
}
/* Handle tx fragmentation processing */ /* Handle tx fragmentation processing */
static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb, static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb,
struct mvneta_tx_queue *txq) struct mvneta_tx_queue *txq)
...@@ -1584,15 +1714,18 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev) ...@@ -1584,15 +1714,18 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev)
u16 txq_id = skb_get_queue_mapping(skb); u16 txq_id = skb_get_queue_mapping(skb);
struct mvneta_tx_queue *txq = &pp->txqs[txq_id]; struct mvneta_tx_queue *txq = &pp->txqs[txq_id];
struct mvneta_tx_desc *tx_desc; struct mvneta_tx_desc *tx_desc;
struct netdev_queue *nq;
int frags = 0; int frags = 0;
u32 tx_cmd; u32 tx_cmd;
if (!netif_running(dev)) if (!netif_running(dev))
goto out; goto out;
if (skb_is_gso(skb)) {
frags = mvneta_tx_tso(skb, dev, txq);
goto out;
}
frags = skb_shinfo(skb)->nr_frags + 1; frags = skb_shinfo(skb)->nr_frags + 1;
nq = netdev_get_tx_queue(dev, txq_id);
/* Get a descriptor for the first part of the packet */ /* Get a descriptor for the first part of the packet */
tx_desc = mvneta_txq_next_desc_get(txq); tx_desc = mvneta_txq_next_desc_get(txq);
...@@ -1635,16 +1768,17 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev) ...@@ -1635,16 +1768,17 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev)
} }
} }
out:
if (frags > 0) {
struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
txq->count += frags; txq->count += frags;
mvneta_txq_pend_desc_add(pp, txq, frags); mvneta_txq_pend_desc_add(pp, txq, frags);
if (txq->size - txq->count < MAX_SKB_FRAGS + 1) if (txq->size - txq->count < MAX_SKB_FRAGS + 1)
netif_tx_stop_queue(nq); netif_tx_stop_queue(nq);
out:
if (frags > 0) {
struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
u64_stats_update_begin(&stats->syncp); u64_stats_update_begin(&stats->syncp);
stats->tx_packets++; stats->tx_packets++;
stats->tx_bytes += skb->len; stats->tx_bytes += skb->len;
...@@ -2109,6 +2243,18 @@ static int mvneta_txq_init(struct mvneta_port *pp, ...@@ -2109,6 +2243,18 @@ static int mvneta_txq_init(struct mvneta_port *pp,
txq->descs, txq->descs_phys); txq->descs, txq->descs_phys);
return -ENOMEM; return -ENOMEM;
} }
/* Allocate DMA buffers for TSO MAC/IP/TCP headers */
txq->tso_hdrs = dma_alloc_coherent(pp->dev->dev.parent,
txq->size * TSO_HEADER_SIZE,
&txq->tso_hdrs_phys, GFP_KERNEL);
if (txq->tso_hdrs == NULL) {
kfree(txq->tx_skb);
dma_free_coherent(pp->dev->dev.parent,
txq->size * MVNETA_DESC_ALIGNED_SIZE,
txq->descs, txq->descs_phys);
return -ENOMEM;
}
mvneta_tx_done_pkts_coal_set(pp, txq, txq->done_pkts_coal); mvneta_tx_done_pkts_coal_set(pp, txq, txq->done_pkts_coal);
return 0; return 0;
...@@ -2120,6 +2266,10 @@ static void mvneta_txq_deinit(struct mvneta_port *pp, ...@@ -2120,6 +2266,10 @@ static void mvneta_txq_deinit(struct mvneta_port *pp,
{ {
kfree(txq->tx_skb); kfree(txq->tx_skb);
if (txq->tso_hdrs)
dma_free_coherent(pp->dev->dev.parent,
txq->size * TSO_HEADER_SIZE,
txq->tso_hdrs, txq->tso_hdrs_phys);
if (txq->descs) if (txq->descs)
dma_free_coherent(pp->dev->dev.parent, dma_free_coherent(pp->dev->dev.parent,
txq->size * MVNETA_DESC_ALIGNED_SIZE, txq->size * MVNETA_DESC_ALIGNED_SIZE,
...@@ -2895,9 +3045,9 @@ static int mvneta_probe(struct platform_device *pdev) ...@@ -2895,9 +3045,9 @@ static int mvneta_probe(struct platform_device *pdev)
netif_napi_add(dev, &pp->napi, mvneta_poll, pp->weight); netif_napi_add(dev, &pp->napi, mvneta_poll, pp->weight);
dev->features = NETIF_F_SG | NETIF_F_IP_CSUM; dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
dev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM; dev->hw_features |= dev->features;
dev->vlan_features |= NETIF_F_SG | NETIF_F_IP_CSUM; dev->vlan_features |= dev->features;
dev->priv_flags |= IFF_UNICAST_FLT; dev->priv_flags |= IFF_UNICAST_FLT;
err = register_netdev(dev); err = register_netdev(dev);
......
#ifndef _TSO_H
#define _TSO_H
#include <net/ip.h>
struct tso_t {
int next_frag_idx;
void *data;
size_t size;
u16 ip_id;
u32 tcp_seq;
};
int tso_count_descs(struct sk_buff *skb);
void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
int size, bool is_last);
void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size);
void tso_start(struct sk_buff *skb, struct tso_t *tso);
#endif /* _TSO_H */
...@@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o ...@@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o sock_diag.o dev_ioctl.o tso.o
obj-$(CONFIG_XFRM) += flow.o obj-$(CONFIG_XFRM) += flow.o
obj-y += net-sysfs.o obj-y += net-sysfs.o
......
#include <net/ip.h>
#include <net/tso.h>
/* Calculate expected number of TX descriptors */
int tso_count_descs(struct sk_buff *skb)
{
/* The Marvell Way */
return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags;
}
void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
int size, bool is_last)
{
struct iphdr *iph;
struct tcphdr *tcph;
int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
int mac_hdr_len = skb_network_offset(skb);
memcpy(hdr, skb->data, hdr_len);
iph = (struct iphdr *)(hdr + mac_hdr_len);
iph->id = htons(tso->ip_id);
iph->tot_len = htons(size + hdr_len - mac_hdr_len);
tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb));
tcph->seq = htonl(tso->tcp_seq);
tso->ip_id++;
if (!is_last) {
/* Clear all special flags for not last packet */
tcph->psh = 0;
tcph->fin = 0;
tcph->rst = 0;
}
}
void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size)
{
tso->tcp_seq += size;
tso->size -= size;
tso->data += size;
if ((tso->size == 0) &&
(tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
/* Move to next segment */
tso->size = frag->size;
tso->data = page_address(frag->page.p) + frag->page_offset;
tso->next_frag_idx++;
}
}
void tso_start(struct sk_buff *skb, struct tso_t *tso)
{
int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
tso->ip_id = ntohs(ip_hdr(skb)->id);
tso->tcp_seq = ntohl(tcp_hdr(skb)->seq);
tso->next_frag_idx = 0;
/* Build first data */
tso->size = skb_headlen(skb) - hdr_len;
tso->data = skb->data + hdr_len;
if ((tso->size == 0) &&
(tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
/* Move to next segment */
tso->size = frag->size;
tso->data = page_address(frag->page.p) + frag->page_offset;
tso->next_frag_idx++;
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment