Commit 95862749 authored by Jakub Kicinski's avatar Jakub Kicinski Committed by David S. Miller

nfp: copy only the relevant part of the TX descriptor for frags

Chained descriptors for fragments need to duplicate all the descriptor
fields of the skb head, so we copy the descriptor and then modify the
relevant fields.  This is wasteful, because the top half of the descriptor
will get overwritten entirely while the bottom half is not modified at all.
Copy only the bottom half.  This saves us 0.3% of CPU in a GSO test.
Signed-off-by: default avatarJakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: default avatarDirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6015c71e
...@@ -158,6 +158,7 @@ struct nfp_net_tx_desc { ...@@ -158,6 +158,7 @@ struct nfp_net_tx_desc {
__le16 data_len; /* Length of frame + meta data */ __le16 data_len; /* Length of frame + meta data */
} __packed; } __packed;
__le32 vals[4]; __le32 vals[4];
__le64 vals8[2];
}; };
}; };
......
...@@ -786,11 +786,11 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) ...@@ -786,11 +786,11 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
{ {
struct nfp_net *nn = netdev_priv(netdev); struct nfp_net *nn = netdev_priv(netdev);
const struct skb_frag_struct *frag; const struct skb_frag_struct *frag;
struct nfp_net_tx_desc *txd, txdg;
int f, nr_frags, wr_idx, md_bytes; int f, nr_frags, wr_idx, md_bytes;
struct nfp_net_tx_ring *tx_ring; struct nfp_net_tx_ring *tx_ring;
struct nfp_net_r_vector *r_vec; struct nfp_net_r_vector *r_vec;
struct nfp_net_tx_buf *txbuf; struct nfp_net_tx_buf *txbuf;
struct nfp_net_tx_desc *txd;
struct netdev_queue *nd_q; struct netdev_queue *nd_q;
struct nfp_net_dp *dp; struct nfp_net_dp *dp;
dma_addr_t dma_addr; dma_addr_t dma_addr;
...@@ -860,8 +860,10 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) ...@@ -860,8 +860,10 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
/* Gather DMA */ /* Gather DMA */
if (nr_frags > 0) { if (nr_frags > 0) {
__le64 second_half;
/* all descs must match except for in addr, length and eop */ /* all descs must match except for in addr, length and eop */
txdg = *txd; second_half = txd->vals8[1];
for (f = 0; f < nr_frags; f++) { for (f = 0; f < nr_frags; f++) {
frag = &skb_shinfo(skb)->frags[f]; frag = &skb_shinfo(skb)->frags[f];
...@@ -878,11 +880,11 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) ...@@ -878,11 +880,11 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
tx_ring->txbufs[wr_idx].fidx = f; tx_ring->txbufs[wr_idx].fidx = f;
txd = &tx_ring->txds[wr_idx]; txd = &tx_ring->txds[wr_idx];
*txd = txdg;
txd->dma_len = cpu_to_le16(fsize); txd->dma_len = cpu_to_le16(fsize);
nfp_desc_set_dma_addr(txd, dma_addr); nfp_desc_set_dma_addr(txd, dma_addr);
txd->offset_eop |= txd->offset_eop = md_bytes |
(f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0; ((f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0);
txd->vals8[1] = second_half;
} }
u64_stats_update_begin(&r_vec->tx_sync); u64_stats_update_begin(&r_vec->tx_sync);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment